scdesigner 0.0.5__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scdesigner/base/__init__.py +8 -0
- scdesigner/base/copula.py +416 -0
- scdesigner/base/marginal.py +391 -0
- scdesigner/base/simulator.py +59 -0
- scdesigner/copulas/__init__.py +8 -0
- scdesigner/copulas/standard_copula.py +645 -0
- scdesigner/datasets/__init__.py +5 -0
- scdesigner/datasets/pancreas.py +39 -0
- scdesigner/distributions/__init__.py +19 -0
- scdesigner/{minimal → distributions}/bernoulli.py +42 -14
- scdesigner/distributions/gaussian.py +114 -0
- scdesigner/distributions/negbin.py +121 -0
- scdesigner/distributions/negbin_irls.py +72 -0
- scdesigner/distributions/negbin_irls_funs.py +456 -0
- scdesigner/distributions/poisson.py +88 -0
- scdesigner/{minimal → distributions}/zero_inflated_negbin.py +39 -10
- scdesigner/distributions/zero_inflated_poisson.py +103 -0
- scdesigner/simulators/__init__.py +24 -28
- scdesigner/simulators/composite.py +239 -0
- scdesigner/simulators/positive_nonnegative_matrix_factorization.py +477 -0
- scdesigner/simulators/scd3.py +486 -0
- scdesigner/transform/__init__.py +8 -6
- scdesigner/{minimal → transform}/transform.py +1 -1
- scdesigner/{minimal → utils}/kwargs.py +4 -1
- {scdesigner-0.0.5.dist-info → scdesigner-0.0.10.dist-info}/METADATA +1 -1
- scdesigner-0.0.10.dist-info/RECORD +28 -0
- {scdesigner-0.0.5.dist-info → scdesigner-0.0.10.dist-info}/WHEEL +1 -1
- scdesigner/data/__init__.py +0 -16
- scdesigner/data/formula.py +0 -137
- scdesigner/data/group.py +0 -123
- scdesigner/data/sparse.py +0 -39
- scdesigner/diagnose/__init__.py +0 -65
- scdesigner/diagnose/aic_bic.py +0 -119
- scdesigner/diagnose/plot.py +0 -242
- scdesigner/estimators/__init__.py +0 -32
- scdesigner/estimators/bernoulli.py +0 -85
- scdesigner/estimators/gaussian.py +0 -121
- scdesigner/estimators/gaussian_copula_factory.py +0 -367
- scdesigner/estimators/glm_factory.py +0 -75
- scdesigner/estimators/negbin.py +0 -153
- scdesigner/estimators/pnmf.py +0 -160
- scdesigner/estimators/poisson.py +0 -124
- scdesigner/estimators/zero_inflated_negbin.py +0 -195
- scdesigner/estimators/zero_inflated_poisson.py +0 -85
- scdesigner/format/__init__.py +0 -4
- scdesigner/format/format.py +0 -20
- scdesigner/format/print.py +0 -30
- scdesigner/minimal/__init__.py +0 -17
- scdesigner/minimal/composite.py +0 -119
- scdesigner/minimal/copula.py +0 -205
- scdesigner/minimal/formula.py +0 -23
- scdesigner/minimal/gaussian.py +0 -65
- scdesigner/minimal/loader.py +0 -211
- scdesigner/minimal/marginal.py +0 -154
- scdesigner/minimal/negbin.py +0 -73
- scdesigner/minimal/positive_nonnegative_matrix_factorization.py +0 -231
- scdesigner/minimal/scd3.py +0 -96
- scdesigner/minimal/scd3_instances.py +0 -50
- scdesigner/minimal/simulator.py +0 -25
- scdesigner/minimal/standard_copula.py +0 -383
- scdesigner/predictors/__init__.py +0 -15
- scdesigner/predictors/bernoulli.py +0 -9
- scdesigner/predictors/gaussian.py +0 -16
- scdesigner/predictors/negbin.py +0 -17
- scdesigner/predictors/poisson.py +0 -12
- scdesigner/predictors/zero_inflated_negbin.py +0 -18
- scdesigner/predictors/zero_inflated_poisson.py +0 -18
- scdesigner/samplers/__init__.py +0 -23
- scdesigner/samplers/bernoulli.py +0 -27
- scdesigner/samplers/gaussian.py +0 -25
- scdesigner/samplers/glm_factory.py +0 -103
- scdesigner/samplers/negbin.py +0 -25
- scdesigner/samplers/poisson.py +0 -25
- scdesigner/samplers/zero_inflated_negbin.py +0 -40
- scdesigner/samplers/zero_inflated_poisson.py +0 -16
- scdesigner/simulators/composite_regressor.py +0 -72
- scdesigner/simulators/glm_simulator.py +0 -167
- scdesigner/simulators/pnmf_regression.py +0 -61
- scdesigner/transform/amplify.py +0 -14
- scdesigner/transform/mask.py +0 -33
- scdesigner/transform/nullify.py +0 -25
- scdesigner/transform/split.py +0 -23
- scdesigner/transform/substitute.py +0 -14
- scdesigner-0.0.5.dist-info/RECORD +0 -66
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
from ..base.copula import Copula
|
|
2
|
+
from ..data.loader import obs_loader, adata_loader
|
|
3
|
+
from ..base.marginal import Marginal
|
|
4
|
+
from ..base.simulator import Simulator
|
|
5
|
+
from anndata import AnnData
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
import torch
|
|
8
|
+
import numpy as np
|
|
9
|
+
from ..distributions import (
|
|
10
|
+
NegBin,
|
|
11
|
+
NegBinIRLS,
|
|
12
|
+
ZeroInflatedNegBin,
|
|
13
|
+
Gaussian,
|
|
14
|
+
Poisson,
|
|
15
|
+
ZeroInflatedPoisson,
|
|
16
|
+
Bernoulli,
|
|
17
|
+
)
|
|
18
|
+
from ..copulas import StandardCopula
|
|
19
|
+
from typing import Optional
|
|
20
|
+
from abc import ABC
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SCD3Simulator(Simulator, ABC):
|
|
24
|
+
"""High-level simulation wrapper combining a marginal model and a copula.
|
|
25
|
+
|
|
26
|
+
The :class:`SCD3Simulator` class coordinates fitting of a marginal model
|
|
27
|
+
(e.g. negative binomial, zero-inflated negative binomial) together with a
|
|
28
|
+
dependence structure specified by a copula (e.g. :class:`StandardCopula`).
|
|
29
|
+
Subclasses provide concrete combinations of marginal and copula models
|
|
30
|
+
tailored to common use cases.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
marginal : Marginal
|
|
35
|
+
Fitted or unfitted marginal model describing the distribution of each
|
|
36
|
+
feature (e.g. gene) conditional on covariates.
|
|
37
|
+
copula : Copula
|
|
38
|
+
Copula object that captures dependence between features and shares
|
|
39
|
+
the same covariate structure as the marginal model.
|
|
40
|
+
|
|
41
|
+
Attributes
|
|
42
|
+
----------
|
|
43
|
+
marginal : Marginal
|
|
44
|
+
The marginal model instance used for fitting and simulation.
|
|
45
|
+
copula : Copula
|
|
46
|
+
The copula instance used to model dependence between features.
|
|
47
|
+
template : AnnData or None
|
|
48
|
+
Reference dataset used to define the observed covariate space and
|
|
49
|
+
feature set for simulation. Set during :meth:`fit`.
|
|
50
|
+
parameters : dict or None
|
|
51
|
+
Dictionary containing fitted parameters for both the marginal and
|
|
52
|
+
copula components after :meth:`fit` has been called.
|
|
53
|
+
|
|
54
|
+
Examples
|
|
55
|
+
--------
|
|
56
|
+
The abstract :class:`SCD3Simulator` is not used directly. Instead, use one
|
|
57
|
+
of its concrete subclasses, e.g. :class:`NegBinCopula`::
|
|
58
|
+
|
|
59
|
+
>>> import scanpy as sc
|
|
60
|
+
>>> from scdesigner.simulators.scd3 import NegBinCopula
|
|
61
|
+
>>>
|
|
62
|
+
>>> adata = sc.datasets.pbmc3k()[:, :100].copy()
|
|
63
|
+
>>>
|
|
64
|
+
>>> # Mean expression depends on group; copula uses the same group structure
|
|
65
|
+
>>> sim = NegBinCopula(mean_formula="~ 1", dispersion_formula="~ 1",
|
|
66
|
+
... copula_formula="~ 1")
|
|
67
|
+
>>> sim.fit(adata, batch_size=256, max_epochs=10) # doctest: +SKIP
|
|
68
|
+
>>>
|
|
69
|
+
>>> # Generate synthetic data with the same obs covariates
|
|
70
|
+
>>> synthetic = sim.sample(batch_size=512) # doctest: +SKIP
|
|
71
|
+
>>> synthetic.X.shape == adata.shape # doctest: +SKIP
|
|
72
|
+
True
|
|
73
|
+
>>>
|
|
74
|
+
>>> # Compute model complexity via AIC/BIC of the copula component
|
|
75
|
+
>>> metrics = sim.complexity() # doctest: +SKIP
|
|
76
|
+
>>> sorted(metrics.keys()) # doctest: +SKIP
|
|
77
|
+
['aic', 'bic'] # doctest: +SKIP
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, marginal: Marginal, copula: Copula):
|
|
81
|
+
self.marginal = marginal
|
|
82
|
+
self.copula = copula
|
|
83
|
+
self.template = None
|
|
84
|
+
self.parameters = None
|
|
85
|
+
|
|
86
|
+
def fit(self, adata: AnnData, **kwargs):
|
|
87
|
+
"""Fit marginal and copula components to an AnnData object.
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
adata : AnnData
|
|
92
|
+
Input dataset with cells in rows and features (e.g. genes) in
|
|
93
|
+
columns. Both the marginal and copula components are fitted to
|
|
94
|
+
this data.
|
|
95
|
+
**kwargs
|
|
96
|
+
Additional keyword arguments forwarded to the marginal and copula
|
|
97
|
+
fit routines (e.g. ``batch_size``, optimization settings).
|
|
98
|
+
|
|
99
|
+
Notes
|
|
100
|
+
-----
|
|
101
|
+
This method sets the :attr:`template` attribute to ``adata`` and
|
|
102
|
+
stores fitted parameters in :attr:`parameters`.
|
|
103
|
+
"""
|
|
104
|
+
self.template = adata
|
|
105
|
+
self.marginal.setup_data(adata, **kwargs)
|
|
106
|
+
self.marginal.setup_optimizer(**kwargs)
|
|
107
|
+
self.marginal.fit(**kwargs)
|
|
108
|
+
|
|
109
|
+
# copula simulator
|
|
110
|
+
self.copula.setup_data(adata, self.marginal.formula, **kwargs)
|
|
111
|
+
self.copula.fit(self.marginal.uniformize, **kwargs)
|
|
112
|
+
self.parameters = {
|
|
113
|
+
"marginal": self.marginal.parameters,
|
|
114
|
+
"copula": self.copula.parameters,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
def predict(self, obs=None, batch_size: int = 1000, **kwargs):
|
|
118
|
+
"""Predict marginal parameters for given covariates.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
obs : pandas.DataFrame or None, optional
|
|
123
|
+
Observation-level covariate table. If ``None``, use
|
|
124
|
+
``self.template.obs`` from the dataset provided to :meth:`fit`.
|
|
125
|
+
batch_size : int, optional
|
|
126
|
+
Number of observations per mini-batch used during prediction.
|
|
127
|
+
**kwargs
|
|
128
|
+
Additional keyword arguments passed to :func:`obs_loader`.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
dict
|
|
133
|
+
Dictionary mapping parameter names (e.g. ``"mean"``,
|
|
134
|
+
``"dispersion"``) to NumPy arrays of shape ``(n_cells, n_genes)``
|
|
135
|
+
containing the predicted marginal parameters.
|
|
136
|
+
"""
|
|
137
|
+
# prepare an internal data loader for this obs
|
|
138
|
+
if obs is None:
|
|
139
|
+
obs = self.template.obs
|
|
140
|
+
loader = obs_loader(obs, self.marginal.formula, batch_size=batch_size, **kwargs)
|
|
141
|
+
|
|
142
|
+
# get predictions across batches
|
|
143
|
+
local_parameters = []
|
|
144
|
+
for _, x_dict in loader:
|
|
145
|
+
l = self.marginal.predict(x_dict)
|
|
146
|
+
local_parameters.append(l)
|
|
147
|
+
|
|
148
|
+
# convert to a merged dictionary
|
|
149
|
+
keys = list(local_parameters[0].keys())
|
|
150
|
+
return {
|
|
151
|
+
k: torch.cat([d[k] for d in local_parameters]).detach().cpu().numpy()
|
|
152
|
+
for k in keys
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
def sample(self, obs=None, batch_size: int = 1000, **kwargs):
|
|
156
|
+
"""Generate synthetic observations from the fitted model.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
obs : pandas.DataFrame or None, optional
|
|
161
|
+
Observation-level covariate table defining the covariate space
|
|
162
|
+
for simulation. If ``None``, use ``self.template.obs``.
|
|
163
|
+
batch_size : int, optional
|
|
164
|
+
Number of observations per mini-batch used during sampling.
|
|
165
|
+
**kwargs
|
|
166
|
+
Additional keyword arguments passed to :func:`obs_loader`.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
AnnData
|
|
171
|
+
An :class:`AnnData` object with simulated counts in ``.X`` and
|
|
172
|
+
``obs`` equal to the provided covariate table.
|
|
173
|
+
"""
|
|
174
|
+
if obs is None:
|
|
175
|
+
obs = self.template.obs
|
|
176
|
+
loader = obs_loader(
|
|
177
|
+
obs,
|
|
178
|
+
self.copula.formula | self.marginal.formula,
|
|
179
|
+
batch_size=batch_size,
|
|
180
|
+
**kwargs,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# get samples across batches
|
|
184
|
+
samples = []
|
|
185
|
+
for _, x_dict in loader:
|
|
186
|
+
u = self.copula.pseudo_obs(x_dict)
|
|
187
|
+
u = torch.from_numpy(u)
|
|
188
|
+
samples.append(self.marginal.invert(u, x_dict))
|
|
189
|
+
samples = torch.cat(samples).detach().cpu().numpy()
|
|
190
|
+
return AnnData(X=samples, obs=obs)
|
|
191
|
+
|
|
192
|
+
def complexity(self, adata: AnnData = None, **kwargs):
|
|
193
|
+
"""Compute model complexity metrics (AIC, BIC) for the copula component.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
adata : AnnData or None, optional
|
|
198
|
+
Dataset to evaluate the copula log-likelihood on. If ``None``,
|
|
199
|
+
use the template dataset stored during :meth:`fit`.
|
|
200
|
+
**kwargs
|
|
201
|
+
Additional keyword arguments passed to :func:`adata_loader`.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
dict
|
|
206
|
+
Dictionary with keys ``"aic"`` and ``"bic"`` computed from the
|
|
207
|
+
copula log-likelihood and :meth:`copula.num_params`.
|
|
208
|
+
"""
|
|
209
|
+
if adata is None:
|
|
210
|
+
adata = self.template
|
|
211
|
+
|
|
212
|
+
N, ll = 0, 0
|
|
213
|
+
loader = adata_loader(
|
|
214
|
+
adata, self.marginal.formula | self.copula.formula, **kwargs
|
|
215
|
+
)
|
|
216
|
+
for batch in tqdm(loader, desc="Computing log-likelihood..."):
|
|
217
|
+
ll += self.copula.likelihood(self.marginal.uniformize, batch).sum()
|
|
218
|
+
N += len(batch[0])
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
"aic": -2 * ll + 2 * self.copula.num_params(),
|
|
222
|
+
"bic": -2 * ll + np.log(N) * self.copula.num_params(),
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
################################################################################
|
|
227
|
+
## SCD3 instances
|
|
228
|
+
################################################################################
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class NegBinCopula(SCD3Simulator):
|
|
232
|
+
"""Simulator using negative binomial marginals with a Gaussian copula.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
mean_formula : str or None, optional
|
|
237
|
+
Model formula for the mean parameter of the negative binomial
|
|
238
|
+
marginal (e.g. ``"~ 1"`` or ``"~ group"``). If ``None``, a
|
|
239
|
+
default constant-mean formula is used.
|
|
240
|
+
dispersion_formula : str or None, optional
|
|
241
|
+
Model formula for the dispersion parameter of the negative
|
|
242
|
+
binomial marginal. If ``None``, a default constant-dispersion
|
|
243
|
+
formula is used.
|
|
244
|
+
copula_formula : str or None, optional
|
|
245
|
+
Copula formula describing how copula depends on experimental
|
|
246
|
+
or biological conditions (e.g. ``"~ group"``).If ``None``,
|
|
247
|
+
a default intercept-only formula is used.
|
|
248
|
+
|
|
249
|
+
See Also
|
|
250
|
+
--------
|
|
251
|
+
:class:`SCD3Simulator`
|
|
252
|
+
:class:`NegBin`
|
|
253
|
+
:class:`StandardCopula`
|
|
254
|
+
"""
|
|
255
|
+
|
|
256
|
+
def __init__(
|
|
257
|
+
self,
|
|
258
|
+
mean_formula: Optional[str] = None,
|
|
259
|
+
dispersion_formula: Optional[str] = None,
|
|
260
|
+
copula_formula: Optional[str] = None,
|
|
261
|
+
) -> None:
|
|
262
|
+
marginal = NegBin({"mean": mean_formula, "dispersion": dispersion_formula})
|
|
263
|
+
covariance = StandardCopula(copula_formula)
|
|
264
|
+
super().__init__(marginal, covariance)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class ZeroInflatedNegBinCopula(SCD3Simulator):
|
|
268
|
+
"""Simulator using zero-inflated negative binomial marginals with
|
|
269
|
+
a Gaussian copula.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
mean_formula : str or None, optional
|
|
274
|
+
Model formula for the mean parameter of the zero-inflated
|
|
275
|
+
negative binomial marginal (e.g. ``"~ 1"`` or ``"~ group"``).
|
|
276
|
+
If ``None``, a default constant-mean formula is used.
|
|
277
|
+
dispersion_formula : str or None, optional
|
|
278
|
+
Model formula for the dispersion parameter of the zero-inflated
|
|
279
|
+
negative binomial marginal. If ``None``, a default
|
|
280
|
+
constant-dispersion formula is used.
|
|
281
|
+
zero_inflation_formula : str or None, optional
|
|
282
|
+
Model formula for the zero-inflation parameter of the zero-inflated
|
|
283
|
+
negative binomial marginal. If ``None``, a default
|
|
284
|
+
constant-zero-inflation formula is used.
|
|
285
|
+
copula_formula : str or None, optional
|
|
286
|
+
Copula formula describing how copula depends on experimental or
|
|
287
|
+
biological conditions (e.g. ``"~ group"``). If ``None``, a default
|
|
288
|
+
intercept-only formula is used.
|
|
289
|
+
|
|
290
|
+
See Also
|
|
291
|
+
--------
|
|
292
|
+
:class:`SCD3Simulator`
|
|
293
|
+
:class:`ZeroInflatedNegBin`
|
|
294
|
+
:class:`StandardCopula`
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
def __init__(
|
|
298
|
+
self,
|
|
299
|
+
mean_formula: Optional[str] = None,
|
|
300
|
+
dispersion_formula: Optional[str] = None,
|
|
301
|
+
zero_inflation_formula: Optional[str] = None,
|
|
302
|
+
copula_formula: Optional[str] = None,
|
|
303
|
+
) -> None:
|
|
304
|
+
marginal = ZeroInflatedNegBin(
|
|
305
|
+
{
|
|
306
|
+
"mean": mean_formula,
|
|
307
|
+
"dispersion": dispersion_formula,
|
|
308
|
+
"zero_inflation_formula": zero_inflation_formula,
|
|
309
|
+
}
|
|
310
|
+
)
|
|
311
|
+
covariance = StandardCopula(copula_formula)
|
|
312
|
+
super().__init__(marginal, covariance)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class BernoulliCopula(SCD3Simulator):
|
|
316
|
+
"""Simulator using Bernoulli marginals with a Gaussian copula.
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
mean_formula : str or None, optional
|
|
321
|
+
Model formula for the mean parameter of the Bernoulli marginal
|
|
322
|
+
(e.g. ``"~ 1"`` or ``"~ group"``). If ``None``, a default
|
|
323
|
+
constant-mean formula is used.
|
|
324
|
+
copula_formula : str or None, optional
|
|
325
|
+
Copula formula describing how copula depends on experimental or
|
|
326
|
+
biological conditions (e.g. ``"~ group"``). If ``None``, a default
|
|
327
|
+
intercept-only formula is used.
|
|
328
|
+
|
|
329
|
+
See Also
|
|
330
|
+
--------
|
|
331
|
+
:class:`SCD3Simulator`
|
|
332
|
+
:class:`Bernoulli`
|
|
333
|
+
:class:`StandardCopula`
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
def __init__(
|
|
337
|
+
self, mean_formula: Optional[str] = None, copula_formula: Optional[str] = None
|
|
338
|
+
) -> None:
|
|
339
|
+
marginal = Bernoulli({"mean": mean_formula})
|
|
340
|
+
covariance = StandardCopula(copula_formula)
|
|
341
|
+
super().__init__(marginal, covariance)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
class GaussianCopula(SCD3Simulator):
|
|
345
|
+
"""Simulator using Gaussian marginals with a Gaussian copula.
|
|
346
|
+
|
|
347
|
+
Parameters
|
|
348
|
+
----------
|
|
349
|
+
mean_formula : str or None, optional
|
|
350
|
+
Model formula for the mean parameter of the Gaussian marginal
|
|
351
|
+
(e.g. ``"~ 1"`` or ``"~ group"``). If ``None``, a default
|
|
352
|
+
constant-mean formula is used.
|
|
353
|
+
sdev_formula : str or None, optional
|
|
354
|
+
Model formula for the standard deviation parameter of the Gaussian marginal.
|
|
355
|
+
If ``None``, a default constant-standard deviation formula is used.
|
|
356
|
+
copula_formula : str or None, optional
|
|
357
|
+
Copula formula describing how copula depends on experimental or
|
|
358
|
+
biological conditions (e.g. ``"~ group"``). If ``None``, a default
|
|
359
|
+
intercept-only formula is used.
|
|
360
|
+
|
|
361
|
+
See Also
|
|
362
|
+
--------
|
|
363
|
+
:class:`SCD3Simulator`
|
|
364
|
+
:class:`Gaussian`
|
|
365
|
+
:class:`StandardCopula`
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
def __init__(
|
|
369
|
+
self,
|
|
370
|
+
mean_formula: Optional[str] = None,
|
|
371
|
+
sdev_formula: Optional[str] = None,
|
|
372
|
+
copula_formula: Optional[str] = None,
|
|
373
|
+
) -> None:
|
|
374
|
+
marginal = Gaussian({"mean": mean_formula, "sdev": sdev_formula})
|
|
375
|
+
covariance = StandardCopula(copula_formula)
|
|
376
|
+
super().__init__(marginal, covariance)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
class PoissonCopula(SCD3Simulator):
|
|
380
|
+
"""Simulator using Poisson marginals with a Gaussian copula.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
mean_formula : str or None, optional
|
|
385
|
+
Model formula for the mean parameter of the Poisson marginal
|
|
386
|
+
(e.g. ``"~ 1"`` or ``"~ group"``). If ``None``, a default
|
|
387
|
+
constant-mean formula is used.
|
|
388
|
+
copula_formula : str or None, optional
|
|
389
|
+
Copula formula describing how copula depends on experimental or
|
|
390
|
+
biological conditions (e.g. ``"~ group"``). If ``None``, a default
|
|
391
|
+
intercept-only formula is used.
|
|
392
|
+
|
|
393
|
+
See Also
|
|
394
|
+
--------
|
|
395
|
+
:class:`SCD3Simulator`
|
|
396
|
+
:class:`Poisson`
|
|
397
|
+
:class:`StandardCopula`
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
def __init__(
|
|
401
|
+
self, mean_formula: Optional[str] = None, copula_formula: Optional[str] = None
|
|
402
|
+
) -> None:
|
|
403
|
+
marginal = Poisson({"mean": mean_formula})
|
|
404
|
+
covariance = StandardCopula(copula_formula)
|
|
405
|
+
super().__init__(marginal, covariance)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
class ZeroInflatedPoissonCopula(SCD3Simulator):
|
|
409
|
+
"""Simulator using zero-inflated Poisson marginals with a Gaussian copula.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
mean_formula : str or None, optional
|
|
414
|
+
Model formula for the mean parameter of the zero-inflated Poisson marginal
|
|
415
|
+
(e.g. ``"~ 1"`` or ``"~ group"``). If ``None``, a default
|
|
416
|
+
constant-mean formula is used.
|
|
417
|
+
zero_inflation_formula : str or None, optional
|
|
418
|
+
Model formula for the zero-inflation parameter of the zero-inflated Poisson
|
|
419
|
+
marginal. If ``None``, a default constant-zero-inflation formula is used.
|
|
420
|
+
copula_formula : str or None, optional
|
|
421
|
+
Copula formula describing how copula depends on experimental or
|
|
422
|
+
biological conditions (e.g. ``"~ group"``). If ``None``, a default
|
|
423
|
+
intercept-only formula is used.
|
|
424
|
+
|
|
425
|
+
See Also
|
|
426
|
+
--------
|
|
427
|
+
:class:`SCD3Simulator`
|
|
428
|
+
:class:`ZeroInflatedPoisson`
|
|
429
|
+
:class:`StandardCopula`
|
|
430
|
+
"""
|
|
431
|
+
|
|
432
|
+
def __init__(
|
|
433
|
+
self,
|
|
434
|
+
mean_formula: Optional[str] = None,
|
|
435
|
+
zero_inflation_formula: Optional[str] = None,
|
|
436
|
+
copula_formula: Optional[str] = None,
|
|
437
|
+
) -> None:
|
|
438
|
+
marginal = ZeroInflatedPoisson(
|
|
439
|
+
{"mean": mean_formula, "zero_inflation": zero_inflation_formula}
|
|
440
|
+
)
|
|
441
|
+
covariance = StandardCopula(copula_formula)
|
|
442
|
+
super().__init__(marginal, covariance)
|
|
443
|
+
|
|
444
|
+
class NegBinIRLSCopula(SCD3Simulator):
|
|
445
|
+
"""Simulator using negative binomial marginals with a Gaussian copula.
|
|
446
|
+
|
|
447
|
+
Parameters
|
|
448
|
+
----------
|
|
449
|
+
mean_formula : str or None, optional
|
|
450
|
+
Model formula for the mean parameter of the negative binomial
|
|
451
|
+
marginal (e.g. ``"~ 1"`` or ``"~ group"``). If ``None``, a
|
|
452
|
+
default constant-mean formula is used.
|
|
453
|
+
dispersion_formula : str or None, optional
|
|
454
|
+
Model formula for the dispersion parameter of the negative
|
|
455
|
+
binomial marginal. If ``None``, a default constant-dispersion
|
|
456
|
+
formula is used.
|
|
457
|
+
copula_formula : str or None, optional
|
|
458
|
+
Copula formula describing how copula depends on experimental
|
|
459
|
+
or biological conditions (e.g. ``"~ group"``).If ``None``,
|
|
460
|
+
a default intercept-only formula is used.
|
|
461
|
+
|
|
462
|
+
See Also
|
|
463
|
+
--------
|
|
464
|
+
:class:`SCD3Simulator`
|
|
465
|
+
:class:`NegBin`
|
|
466
|
+
:class:`StandardCopula`
|
|
467
|
+
"""
|
|
468
|
+
|
|
469
|
+
def __init__(
|
|
470
|
+
self,
|
|
471
|
+
mean_formula: Optional[str] = None,
|
|
472
|
+
dispersion_formula: Optional[str] = None,
|
|
473
|
+
copula_formula: Optional[str] = None
|
|
474
|
+
) -> None:
|
|
475
|
+
marginal = NegBinIRLS({"mean": mean_formula, "dispersion": dispersion_formula})
|
|
476
|
+
covariance = StandardCopula(copula_formula)
|
|
477
|
+
super().__init__(marginal, covariance)
|
|
478
|
+
|
|
479
|
+
def fit(self, adata: AnnData, batch_size: int = 8224, device="cpu", **kwargs):
|
|
480
|
+
super().fit(adata, batch_size=batch_size, device=device, **kwargs)
|
|
481
|
+
|
|
482
|
+
def sample(self, obs=None, batch_size: int = 8224, **kwargs):
|
|
483
|
+
return super().sample(obs, batch_size, device="cpu", **kwargs)
|
|
484
|
+
|
|
485
|
+
def predict(self, obs=None, batch_size: int = 8224, **kwargs):
|
|
486
|
+
return super().predict(obs, batch_size, device="cpu", **kwargs)
|
scdesigner/transform/__init__.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .amplify import amplify
|
|
3
|
-
from .substitute import substitute
|
|
4
|
-
from .split import split_glm
|
|
5
|
-
from .mask import data_frame_mask
|
|
1
|
+
from .transform import nullify, amplify, decorrelate, correlate, replace_param
|
|
6
2
|
|
|
7
|
-
__all__ = [
|
|
3
|
+
__all__ = [
|
|
4
|
+
"nullify",
|
|
5
|
+
"amplify",
|
|
6
|
+
"decorrelate",
|
|
7
|
+
"correlate",
|
|
8
|
+
"replace_param",
|
|
9
|
+
]
|
|
@@ -11,7 +11,7 @@ DEFAULT_ALLOWED_KWARGS = {
|
|
|
11
11
|
'log_every_n_steps', 'accumulate_grad_batches'
|
|
12
12
|
},
|
|
13
13
|
'data': {
|
|
14
|
-
'chunk_size', 'batch_size', 'shuffle', 'num_workers'
|
|
14
|
+
'chunk_size', 'batch_size', 'shuffle', 'num_workers', 'device'
|
|
15
15
|
},
|
|
16
16
|
'optimizer': {
|
|
17
17
|
'lr', 'learning_rate', 'momentum', 'weight_decay', 'eps', 'betas',
|
|
@@ -20,5 +20,8 @@ DEFAULT_ALLOWED_KWARGS = {
|
|
|
20
20
|
'verbose', 'patience', 'threshold', 'cooldown',
|
|
21
21
|
'optimizer_class', 'optimizer', 'scheduler_class', 'scheduler',
|
|
22
22
|
'monitor', 'interval', 'frequency'
|
|
23
|
+
},
|
|
24
|
+
'initialize': {
|
|
25
|
+
'tol', 'clamp', 'max_iter'
|
|
23
26
|
}
|
|
24
27
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scdesigner
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.10
|
|
4
4
|
Summary: Interactive simulation for rigorous and transparent multi-omics analysis.
|
|
5
5
|
Project-URL: Homepage, https://github.com/krisrs1128/scDesigner/
|
|
6
6
|
Project-URL: Issues, https://github.com/krisrs1128/scDesigner/Issues/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
scdesigner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
scdesigner/base/__init__.py,sha256=At6IcDGIUZmFzYaquqbPtrNTEszYUDjH3yMP6Z6gHqE,140
|
|
3
|
+
scdesigner/base/copula.py,sha256=M4PlrMKqztrjlfqhz3zpNV7HKMhOqJvZw6XEBFsixGs,16335
|
|
4
|
+
scdesigner/base/marginal.py,sha256=zMeFihI1G2GXqU8wnKym9BmbxC3Dv0e9xB-rXuyAWHI,17230
|
|
5
|
+
scdesigner/base/simulator.py,sha256=ImJoz-kb5s4Y_4LdpiKENIR7SRbYy4-sKtuxRF9lFAA,1989
|
|
6
|
+
scdesigner/copulas/__init__.py,sha256=wYUnNDtn5Yga16oa709rW34IvFx_A5RiNf76EywernU,137
|
|
7
|
+
scdesigner/copulas/standard_copula.py,sha256=TuXFOczyZLYR38n41TqG1lYmWw3oJKe4UcUQfoFaw1I,23930
|
|
8
|
+
scdesigner/datasets/__init__.py,sha256=ZPMHA8hma-HVhVlIEkRy7AFNO7DTe_W1GmQ4ME4aMnw,90
|
|
9
|
+
scdesigner/datasets/pancreas.py,sha256=Ds5MgXFRqmDQE4bOfGm-Wza2yiSlFMSIp45-7oP7bpQ,1107
|
|
10
|
+
scdesigner/distributions/__init__.py,sha256=WnLKtxD99flKe6zTgtrV0WdFPbPixRrmVDn8lN2ALuM,458
|
|
11
|
+
scdesigner/distributions/bernoulli.py,sha256=CueT4E7aUXRagCVQujBpMyJ9dksZC144wPtDE7jXej4,3273
|
|
12
|
+
scdesigner/distributions/gaussian.py,sha256=zKy9BShPRqgpXr_32fX8Bwd0lLIdJmdMzINO3BbhMbU,4197
|
|
13
|
+
scdesigner/distributions/negbin.py,sha256=qe_Br_kHd4A8ufw65Xpaa9S0YFXW9buyz81fv4cxD6E,4511
|
|
14
|
+
scdesigner/distributions/negbin_irls.py,sha256=-2oI9vB3g1y2eFT9C6sRn3oCnPAhfQm5L-X5Y81EQYo,3078
|
|
15
|
+
scdesigner/distributions/negbin_irls_funs.py,sha256=VfrSUAW7EPpnohtE6QAafpvyNJp03PG40cnc-B9Qzas,15016
|
|
16
|
+
scdesigner/distributions/poisson.py,sha256=cjREg6oXyVPMfYc7vL1thhk4HbmQqf80JjTEe0cs9aU,3145
|
|
17
|
+
scdesigner/distributions/zero_inflated_negbin.py,sha256=eVeRgHQb8OilenGekkA_hX4JJcfOMS34PtY68n2YgeQ,4409
|
|
18
|
+
scdesigner/distributions/zero_inflated_poisson.py,sha256=AH4SCbYP7swqsLlNcNo7A-KponmjdOh-8xybf_g6YgE,3943
|
|
19
|
+
scdesigner/simulators/__init__.py,sha256=OnkPOCXyXH9zfn8Q2vTrUwMXtMwgHwBn12f7j_sMfRM,587
|
|
20
|
+
scdesigner/simulators/composite.py,sha256=1H69xBQNMBZgWYFyd3RDVrzqh6i_5QrnaxXsCR56v-o,9149
|
|
21
|
+
scdesigner/simulators/positive_nonnegative_matrix_factorization.py,sha256=KnPLlN5MgNdHRxehA9G9g7rxT_ZOt6eiiJZRN0lkzyQ,15505
|
|
22
|
+
scdesigner/simulators/scd3.py,sha256=x1gHFnuDxDFBvx-BVzmJwGrNUG22G32d_ng8PnfviQQ,17739
|
|
23
|
+
scdesigner/transform/__init__.py,sha256=F56wBgxPXRv0LYF91FL--R3qfFIsBLe2C9aJyiy7zD8,180
|
|
24
|
+
scdesigner/transform/transform.py,sha256=xhq1fdfCfQGG5bCkSF_bQUCrf9FxNqd6IOMw-PCqOIY,5465
|
|
25
|
+
scdesigner/utils/kwargs.py,sha256=7JAY8Bd47CrMnmlmGoPRViCvvg1DgyjftFPm5PSa36A,1051
|
|
26
|
+
scdesigner-0.0.10.dist-info/METADATA,sha256=5zHm2anjobQkgDQn12MkgNUbQbL306LcQc9iC6j7v_A,742
|
|
27
|
+
scdesigner-0.0.10.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
28
|
+
scdesigner-0.0.10.dist-info/RECORD,,
|
scdesigner/data/__init__.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from .formula import FormulaViewDataset, formula_loader, multiple_formula_loader, standardize_formula
|
|
2
|
-
from .group import FormulaGroupViewDataset, formula_group_loader, stack_collate, multiple_formula_group_loader
|
|
3
|
-
from .sparse import SparseMatrixDataset, SparseMatrixLoader
|
|
4
|
-
|
|
5
|
-
__all__ = [
|
|
6
|
-
"FormulaViewDataset",
|
|
7
|
-
"SparseMatrixDataset",
|
|
8
|
-
"SparseMatrixLoader",
|
|
9
|
-
"FormulaGroupViewDataset",
|
|
10
|
-
"formula_loader",
|
|
11
|
-
"formula_group_loader",
|
|
12
|
-
"stack_collate",
|
|
13
|
-
"multiple_formula_loader",
|
|
14
|
-
"multiple_formula_group_loader",
|
|
15
|
-
"standardize_formula"
|
|
16
|
-
]
|