DeConveil 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,284 @@
1
+ from typing import Literal
2
+ from typing import Optional
3
+ from typing import Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from joblib import Parallel # type: ignore
8
+ from joblib import delayed
9
+ from joblib import parallel_backend
10
+ from scipy.optimize import minimize # type: ignore
11
+
12
+ from deconveil import inference
13
+ from deconveil import utils_CNaware
14
+ from deconveil.utils_CNaware import fit_lin_mu
15
+
16
+ from pydeseq2 import utils
17
+ from pydeseq2.utils import get_num_processes
18
+ from pydeseq2.utils import fit_alpha_mle
19
+ from pydeseq2.utils import wald_test
20
+
21
+
22
+
23
+ class DefInference(inference.Inference):
24
+
25
+ """Default DESeq2-related inference methods, using scipy/sklearn/numpy.
26
+
27
+ This object contains the interface to the default inference routines and uses
28
+ joblib internally for parallelization. Inherit this class or its parent to write
29
+ custom inference routines.
30
+
31
+ Parameters
32
+ ----------
33
+ joblib_verbosity : int
34
+ The verbosity level for joblib tasks. The higher the value, the more updates
35
+ are reported. (default: ``0``).
36
+ batch_size : int
37
+ Number of tasks to allocate to each joblib parallel worker. (default: ``128``).
38
+ n_cpus : int
39
+ Number of cpus to use. If None, all available cpus will be used.
40
+ (default: ``None``).
41
+ backend : str
42
+ Joblib backend.
43
+ """
44
+
45
+ fit_rough_dispersions = staticmethod(utils_CNaware.fit_rough_dispersions) # type: ignore
46
+ fit_moments_dispersions2 = staticmethod(utils_CNaware.fit_moments_dispersions2) # type: ignore
47
+
48
+ def __init__(
49
+ self,
50
+ joblib_verbosity: int = 0,
51
+ batch_size: int = 128,
52
+ n_cpus: Optional[int] = None,
53
+ backend: str = "loky",
54
+ ):
55
+ self._joblib_verbosity = joblib_verbosity
56
+ self._batch_size = batch_size
57
+ self._n_cpus = utils.get_num_processes(n_cpus)
58
+ self._backend = backend
59
+
60
+ @property
61
+ def n_cpus(self) -> int: # noqa: D102
62
+ return self._n_cpus
63
+
64
+ @n_cpus.setter
65
+ def n_cpus(self, n_cpus: int) -> None:
66
+ self._n_cpus = utils.get_num_processes(n_cpus)
67
+
68
+ def lin_reg_mu( # noqa: D102
69
+ self,
70
+ counts: np.ndarray,
71
+ size_factors: np.ndarray,
72
+ design_matrix: np.ndarray,
73
+ min_mu: float,
74
+ ) -> np.ndarray:
75
+ with parallel_backend(self._backend, inner_max_num_threads=1):
76
+ mu_hat_ = np.array(
77
+ Parallel(
78
+ n_jobs=self.n_cpus,
79
+ verbose=self._joblib_verbosity,
80
+ batch_size=self._batch_size,
81
+ )(
82
+ delayed(utils_CNaware.fit_lin_mu)(
83
+ counts=counts[:, i],
84
+ size_factors=size_factors,
85
+ design_matrix=design_matrix,
86
+ min_mu=min_mu,
87
+ )
88
+ for i in range(counts.shape[1])
89
+ )
90
+ )
91
+ return mu_hat_.T
92
+
93
+ def irls_glm(
94
+ self,
95
+ counts: np.ndarray,
96
+ size_factors: np.ndarray,
97
+ design_matrix: np.ndarray,
98
+ disp: np.ndarray,
99
+ cnv: np.ndarray,
100
+ min_mu: float,
101
+ beta_tol: float,
102
+ min_beta: float = -30,
103
+ max_beta: float = 30,
104
+ optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
105
+ maxiter: int = 250,
106
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
107
+ with parallel_backend(self._backend, inner_max_num_threads=1):
108
+ res = Parallel(
109
+ n_jobs=self.n_cpus,
110
+ verbose=self._joblib_verbosity,
111
+ batch_size=self._batch_size,
112
+ )(
113
+ delayed(utils_CNaware.irls_glm)(
114
+ counts=counts[:, i],
115
+ size_factors=size_factors,
116
+ design_matrix=design_matrix,
117
+ disp=disp[i],
118
+ cnv=cnv[:, i],
119
+ min_mu=min_mu,
120
+ beta_tol=beta_tol,
121
+ min_beta=min_beta,
122
+ max_beta=max_beta,
123
+ optimizer=optimizer,
124
+ maxiter=maxiter,
125
+ )
126
+ for i in range(counts.shape[1])
127
+ )
128
+ res = zip(*res)
129
+ MLE_lfcs_, mu_hat_, hat_diagonals_, converged_ = (np.array(m) for m in res)
130
+
131
+ return (
132
+ MLE_lfcs_,
133
+ mu_hat_.T,
134
+ hat_diagonals_.T,
135
+ converged_,
136
+ )
137
+ def alpha_mle( # noqa: D102
138
+ self,
139
+ counts: np.ndarray,
140
+ design_matrix: np.ndarray,
141
+ mu: np.ndarray,
142
+ alpha_hat: np.ndarray,
143
+ min_disp: float,
144
+ max_disp: float,
145
+ prior_disp_var: Optional[float] = None,
146
+ cr_reg: bool = True,
147
+ prior_reg: bool = False,
148
+ optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
149
+ ) -> Tuple[np.ndarray, np.ndarray]:
150
+ with parallel_backend(self._backend, inner_max_num_threads=1):
151
+ res = Parallel(
152
+ n_jobs=self.n_cpus,
153
+ verbose=self._joblib_verbosity,
154
+ batch_size=self._batch_size,
155
+ )(
156
+ delayed(utils.fit_alpha_mle)(
157
+ counts=counts[:, i],
158
+ design_matrix=design_matrix,
159
+ mu=mu[:, i],
160
+ alpha_hat=alpha_hat[i],
161
+ min_disp=min_disp,
162
+ max_disp=max_disp,
163
+ prior_disp_var=prior_disp_var,
164
+ cr_reg=cr_reg,
165
+ prior_reg=prior_reg,
166
+ optimizer=optimizer,
167
+ )
168
+ for i in range(counts.shape[1])
169
+ )
170
+ res = zip(*res)
171
+ dispersions_, l_bfgs_b_converged_ = (np.array(m) for m in res)
172
+ return dispersions_, l_bfgs_b_converged_
173
+
174
+
175
+ def dispersion_trend_gamma_glm( # noqa: D102
176
+ self, covariates: pd.Series, targets: pd.Series
177
+ ) -> Tuple[np.ndarray, np.ndarray, bool]:
178
+ covariates_w_intercept = covariates.to_frame()
179
+ covariates_w_intercept.insert(0, "intercept", 1)
180
+ covariates_fit = covariates_w_intercept.values
181
+ targets_fit = targets.values
182
+
183
+ def loss(coeffs):
184
+ mu = covariates_fit @ coeffs
185
+ return np.nanmean(targets_fit / mu + np.log(mu), axis=0)
186
+
187
+ def grad(coeffs):
188
+ mu = covariates_fit @ coeffs
189
+ return -np.nanmean(
190
+ ((targets_fit / mu - 1)[:, None] * covariates_fit) / mu[:, None], axis=0
191
+ )
192
+
193
+ try:
194
+ res = minimize(
195
+ loss,
196
+ x0=np.array([1.0, 1.0]),
197
+ jac=grad,
198
+ method="L-BFGS-B",
199
+ bounds=[(1e-12, np.inf)],
200
+ )
201
+ except RuntimeWarning: # Could happen if the coefficients fall to zero
202
+ return np.array([np.nan, np.nan]), np.array([np.nan, np.nan]), False
203
+
204
+ coeffs = res.x
205
+ return coeffs, covariates_fit @ coeffs, res.success
206
+
207
+
208
+ def wald_test( # noqa: D102
209
+ self,
210
+ design_matrix: np.ndarray,
211
+ disp: np.ndarray,
212
+ lfc: np.ndarray,
213
+ mu: np.ndarray,
214
+ ridge_factor: np.ndarray,
215
+ contrast: np.ndarray,
216
+ lfc_null: np.ndarray,
217
+ alt_hypothesis: Optional[
218
+ Literal["greaterAbs", "lessAbs", "greater", "less"]
219
+ ] = None,
220
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
221
+ num_genes = mu.shape[1]
222
+ with parallel_backend(self._backend, inner_max_num_threads=1):
223
+ res = Parallel(
224
+ n_jobs=self.n_cpus,
225
+ verbose=self._joblib_verbosity,
226
+ batch_size=self._batch_size,
227
+ )(
228
+ delayed(wald_test)(
229
+ design_matrix=design_matrix,
230
+ disp=disp[i],
231
+ lfc=lfc[i],
232
+ mu=mu[:, i],
233
+ ridge_factor=ridge_factor,
234
+ contrast=contrast,
235
+ lfc_null=lfc_null, # Convert log2 to natural log
236
+ alt_hypothesis=alt_hypothesis,
237
+ )
238
+ for i in range(num_genes)
239
+ )
240
+ res = zip(*res)
241
+ pvals, stats, se = (np.array(m) for m in res)
242
+
243
+ return pvals, stats, se
244
+
245
+
246
+ def lfc_shrink_nbinom_glm( # noqa: D102
247
+ self,
248
+ design_matrix: np.ndarray,
249
+ counts: np.ndarray,
250
+ cnv: np.ndarray,
251
+ size: np.ndarray,
252
+ offset: np.ndarray,
253
+ prior_no_shrink_scale: float,
254
+ prior_scale: float,
255
+ optimizer: str,
256
+ shrink_index: int,
257
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
258
+ with parallel_backend(self._backend, inner_max_num_threads=1):
259
+ num_genes = counts.shape[1]
260
+ res = Parallel(
261
+ n_jobs=self.n_cpus,
262
+ verbose=self._joblib_verbosity,
263
+ batch_size=self._batch_size,
264
+ )(
265
+ delayed(utils_CNaware.nbinomGLM)(
266
+ design_matrix=design_matrix,
267
+ counts=counts[:, i],
268
+ cnv=cnv[:, i],
269
+ size=size[i],
270
+ offset=offset,
271
+ prior_no_shrink_scale=prior_no_shrink_scale,
272
+ prior_scale=prior_scale,
273
+ optimizer=optimizer,
274
+ shrink_index=shrink_index,
275
+ )
276
+ for i in range(num_genes)
277
+ )
278
+ res = zip(*res)
279
+ lfcs, inv_hessians, l_bfgs_b_converged_ = (np.array(m) for m in res)
280
+ return lfcs, inv_hessians, l_bfgs_b_converged_
281
+
282
+
283
+
284
+