scdesigner 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scdesigner might be problematic. Click here for more details.
- scdesigner/__init__.py +0 -0
- scdesigner/data/__init__.py +16 -0
- scdesigner/data/formula.py +137 -0
- scdesigner/data/group.py +123 -0
- scdesigner/data/sparse.py +39 -0
- scdesigner/diagnose/__init__.py +65 -0
- scdesigner/diagnose/aic_bic.py +119 -0
- scdesigner/diagnose/plot.py +242 -0
- scdesigner/estimators/__init__.py +27 -0
- scdesigner/estimators/bernoulli.py +85 -0
- scdesigner/estimators/gaussian.py +121 -0
- scdesigner/estimators/gaussian_copula_factory.py +152 -0
- scdesigner/estimators/glm_factory.py +75 -0
- scdesigner/estimators/negbin.py +129 -0
- scdesigner/estimators/pnmf.py +160 -0
- scdesigner/estimators/poisson.py +100 -0
- scdesigner/estimators/zero_inflated_negbin.py +195 -0
- scdesigner/estimators/zero_inflated_poisson.py +85 -0
- scdesigner/format/__init__.py +4 -0
- scdesigner/format/format.py +20 -0
- scdesigner/format/print.py +30 -0
- scdesigner/minimal/__init__.py +17 -0
- scdesigner/minimal/bernoulli.py +61 -0
- scdesigner/minimal/composite.py +119 -0
- scdesigner/minimal/copula.py +33 -0
- scdesigner/minimal/formula.py +23 -0
- scdesigner/minimal/gaussian.py +65 -0
- scdesigner/minimal/kwargs.py +24 -0
- scdesigner/minimal/loader.py +166 -0
- scdesigner/minimal/marginal.py +140 -0
- scdesigner/minimal/negbin.py +73 -0
- scdesigner/minimal/positive_nonnegative_matrix_factorization.py +231 -0
- scdesigner/minimal/scd3.py +95 -0
- scdesigner/minimal/scd3_instances.py +50 -0
- scdesigner/minimal/simulator.py +25 -0
- scdesigner/minimal/standard_covariance.py +124 -0
- scdesigner/minimal/transform.py +145 -0
- scdesigner/minimal/zero_inflated_negbin.py +86 -0
- scdesigner/predictors/__init__.py +15 -0
- scdesigner/predictors/bernoulli.py +9 -0
- scdesigner/predictors/gaussian.py +16 -0
- scdesigner/predictors/negbin.py +17 -0
- scdesigner/predictors/poisson.py +12 -0
- scdesigner/predictors/zero_inflated_negbin.py +18 -0
- scdesigner/predictors/zero_inflated_poisson.py +18 -0
- scdesigner/samplers/__init__.py +23 -0
- scdesigner/samplers/bernoulli.py +27 -0
- scdesigner/samplers/gaussian.py +25 -0
- scdesigner/samplers/glm_factory.py +41 -0
- scdesigner/samplers/negbin.py +25 -0
- scdesigner/samplers/poisson.py +25 -0
- scdesigner/samplers/zero_inflated_negbin.py +40 -0
- scdesigner/samplers/zero_inflated_poisson.py +16 -0
- scdesigner/simulators/__init__.py +31 -0
- scdesigner/simulators/composite_regressor.py +72 -0
- scdesigner/simulators/glm_simulator.py +167 -0
- scdesigner/simulators/pnmf_regression.py +61 -0
- scdesigner/transform/__init__.py +7 -0
- scdesigner/transform/amplify.py +14 -0
- scdesigner/transform/mask.py +33 -0
- scdesigner/transform/nullify.py +25 -0
- scdesigner/transform/split.py +23 -0
- scdesigner/transform/substitute.py +14 -0
- scdesigner-0.0.1.dist-info/METADATA +23 -0
- scdesigner-0.0.1.dist-info/RECORD +66 -0
- scdesigner-0.0.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from typing import Union, Sequence
|
|
2
|
+
import numpy as np
|
|
3
|
+
import re
|
|
4
|
+
import torch
|
|
5
|
+
import copy
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def nullify(sim, row_pattern: str, col_pattern: str, param: str):
|
|
9
|
+
"""Nullify marginal parameters
|
|
10
|
+
|
|
11
|
+
Zero out entries matching (row_pattern, col_pattern) for the marginal `param`.
|
|
12
|
+
"""
|
|
13
|
+
sim = copy.deepcopy(sim)
|
|
14
|
+
df = sim.parameters["marginal"][param]
|
|
15
|
+
matches = data_frame_mask(df, row_pattern, col_pattern)
|
|
16
|
+
mask = (~matches).astype(float)
|
|
17
|
+
mat = sim.marginal.predict.coefs[param].detach().cpu().numpy() * mask
|
|
18
|
+
_update_marginal_param(sim, param, mat)
|
|
19
|
+
return sim
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def amplify(sim, factor: float, row_pattern: str, col_pattern: str, param: str):
|
|
23
|
+
"""Multiply selected marginal entries by factor."""
|
|
24
|
+
sim = copy.deepcopy(sim)
|
|
25
|
+
df = sim.parameters["marginal"][param]
|
|
26
|
+
matches = data_frame_mask(df, row_pattern, col_pattern).astype(float)
|
|
27
|
+
mask = factor * matches + np.ones_like(matches)
|
|
28
|
+
mat = sim.marginal.predict.coefs[param].detach().cpu().numpy() * mask
|
|
29
|
+
_update_marginal_param(sim, param, mat)
|
|
30
|
+
return sim
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def decorrelate(sim, row_pattern: str, col_pattern: str, group: Union[str, None] = None):
|
|
34
|
+
"""Zero out selected off-diagonal entries of a covariance."""
|
|
35
|
+
sim = copy.deepcopy(sim)
|
|
36
|
+
def _apply_to_df(df):
|
|
37
|
+
m1 = data_frame_mask(df, ".", col_pattern)
|
|
38
|
+
m2 = data_frame_mask(df, row_pattern, ".")
|
|
39
|
+
mask = (m1 | m2)
|
|
40
|
+
np.fill_diagonal(mask, False)
|
|
41
|
+
df.values[mask] = 0
|
|
42
|
+
|
|
43
|
+
cov = sim.parameters["copula"]
|
|
44
|
+
_apply_to_groups(cov, group, _apply_to_df)
|
|
45
|
+
return sim
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def correlate(sim, factor: float, row_pattern: str, col_pattern: str, group: Union[str, None] = None):
|
|
49
|
+
"""Multiply selected off-diagonal entries by factor."""
|
|
50
|
+
sim = copy.deepcopy(sim)
|
|
51
|
+
def _apply_to_df(df):
|
|
52
|
+
m1 = data_frame_mask(df, ".", col_pattern)
|
|
53
|
+
m2 = data_frame_mask(df, row_pattern, ".")
|
|
54
|
+
mask = (m1 | m2)
|
|
55
|
+
np.fill_diagonal(mask, False)
|
|
56
|
+
df.values[mask] = df.values[mask] * factor
|
|
57
|
+
|
|
58
|
+
cov = sim.parameters["copula"]
|
|
59
|
+
_apply_to_groups(cov, group, _apply_to_df)
|
|
60
|
+
return sim
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def replace_param(sim, path: Sequence[str], new_param):
|
|
64
|
+
"""Substitute a new parameter for an old one.
|
|
65
|
+
|
|
66
|
+
Use the path to the parameter starting from sim.parameters to identify the
|
|
67
|
+
parameter to transform. Examples: ['marginal','mean'] or
|
|
68
|
+
['copula','group_name']
|
|
69
|
+
"""
|
|
70
|
+
sim = copy.deepcopy(sim)
|
|
71
|
+
if path[0] == "marginal":
|
|
72
|
+
param = path[1]
|
|
73
|
+
mat = np.asarray(new_param)
|
|
74
|
+
_update_marginal_param(sim, param, mat)
|
|
75
|
+
|
|
76
|
+
if path[0] == "copula":
|
|
77
|
+
key = path[1]
|
|
78
|
+
cov = sim.parameters["copula"]
|
|
79
|
+
if isinstance(cov, dict):
|
|
80
|
+
cov[key] = new_param
|
|
81
|
+
else:
|
|
82
|
+
sim.parameters["copula"] = new_param
|
|
83
|
+
|
|
84
|
+
return sim
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
###############################################################################
|
|
88
|
+
## Helper functions used throughout
|
|
89
|
+
###############################################################################
|
|
90
|
+
|
|
91
|
+
def str_match(string: str, string_list: list) -> bool:
|
|
92
|
+
for l in string_list:
|
|
93
|
+
if l in string:
|
|
94
|
+
return True
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def data_frame_mask(df, row_pattern=".", col_pattern=".") -> np.array:
|
|
99
|
+
"""Return a boolean mask for a pandas DataFrame where rows/cols match regex patterns.
|
|
100
|
+
|
|
101
|
+
The returned mask has shape (n_rows, n_cols) and is True where the
|
|
102
|
+
intersection of matched rows and matched columns occurs.
|
|
103
|
+
"""
|
|
104
|
+
mask = np.zeros(df.shape, dtype=bool)
|
|
105
|
+
if isinstance(col_pattern, str):
|
|
106
|
+
col_pattern = [col_pattern]
|
|
107
|
+
if isinstance(row_pattern, str):
|
|
108
|
+
row_pattern = [row_pattern]
|
|
109
|
+
|
|
110
|
+
# check for columns that match at least one pattern
|
|
111
|
+
col_matches = np.zeros(df.shape[1], dtype=bool)
|
|
112
|
+
for col in df.columns:
|
|
113
|
+
if any(re.search(pat, col) for pat in col_pattern):
|
|
114
|
+
col_matches[df.columns.get_loc(col)] = True
|
|
115
|
+
|
|
116
|
+
# same with rows
|
|
117
|
+
row_matches = np.zeros(df.shape[0], dtype=bool)
|
|
118
|
+
for idx in df.index:
|
|
119
|
+
if any(re.search(pat, str(idx)) for pat in row_pattern):
|
|
120
|
+
row_matches[df.index.get_loc(idx)] = True
|
|
121
|
+
|
|
122
|
+
# set mask to the intersection of row and column matches
|
|
123
|
+
mask = np.outer(row_matches, col_matches)
|
|
124
|
+
return mask
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _apply_to_groups(cov_obj, grp, f):
|
|
128
|
+
"""Apply f to either all group arrays or a single group's array."""
|
|
129
|
+
if isinstance(cov_obj, dict):
|
|
130
|
+
if grp is None:
|
|
131
|
+
for k in list(cov_obj.keys()):
|
|
132
|
+
f(cov_obj[k])
|
|
133
|
+
else:
|
|
134
|
+
f(cov_obj[grp])
|
|
135
|
+
else:
|
|
136
|
+
f(cov_obj)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _update_marginal_param(sim, param: str, mat: np.ndarray):
|
|
140
|
+
"""Update the torch Parameter for a marginal `param`"""
|
|
141
|
+
tensor = sim.marginal.predict.coefs[param]
|
|
142
|
+
with torch.no_grad():
|
|
143
|
+
t = torch.from_numpy(np.asarray(mat)).to(dtype=tensor.dtype, device=tensor.device)
|
|
144
|
+
tensor.copy_(t)
|
|
145
|
+
sim.parameters["marginal"][param].values[:] = tensor.detach().cpu().numpy()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from .formula import standardize_formula
|
|
2
|
+
from .marginal import GLMPredictor, Marginal
|
|
3
|
+
from .loader import _to_numpy
|
|
4
|
+
from typing import Union, Dict, Optional
|
|
5
|
+
import torch
|
|
6
|
+
import numpy as np
|
|
7
|
+
from scipy.stats import nbinom, bernoulli
|
|
8
|
+
|
|
9
|
+
class ZeroInflatedNegBin(Marginal):
|
|
10
|
+
"""Zero-inflated negative-binomial marginal estimator"""
|
|
11
|
+
def __init__(self, formula: Union[Dict, str]):
|
|
12
|
+
formula = standardize_formula(formula, allowed_keys=['mean', 'dispersion', 'zero_inflation'])
|
|
13
|
+
super().__init__(formula)
|
|
14
|
+
|
|
15
|
+
def setup_optimizer(
|
|
16
|
+
self,
|
|
17
|
+
optimizer_class: Optional[callable] = torch.optim.Adam,
|
|
18
|
+
**optimizer_kwargs,
|
|
19
|
+
):
|
|
20
|
+
if self.loader is None:
|
|
21
|
+
raise RuntimeError("self.loader is not set (call setup_data first)")
|
|
22
|
+
|
|
23
|
+
link_funs = {
|
|
24
|
+
"mean": torch.exp,
|
|
25
|
+
"dispersion": torch.exp,
|
|
26
|
+
"zero_inflation": torch.sigmoid,
|
|
27
|
+
}
|
|
28
|
+
nll = lambda batch: -self.likelihood(batch).sum()
|
|
29
|
+
self.predict = GLMPredictor(
|
|
30
|
+
n_outcomes=self.n_outcomes,
|
|
31
|
+
feature_dims=self.feature_dims,
|
|
32
|
+
link_fns=link_funs,
|
|
33
|
+
loss_fn=nll,
|
|
34
|
+
optimizer_class=optimizer_class,
|
|
35
|
+
optimizer_kwargs=optimizer_kwargs
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def likelihood(self, batch):
|
|
39
|
+
"""Compute the negative log-likelihood"""
|
|
40
|
+
y, x = batch
|
|
41
|
+
params = self.predict(x)
|
|
42
|
+
mu = params.get("mean")
|
|
43
|
+
r = params.get("dispersion")
|
|
44
|
+
pi = params.get("zero_inflation")
|
|
45
|
+
|
|
46
|
+
# negative binomial component
|
|
47
|
+
negbin_loglikelihood = (
|
|
48
|
+
torch.lgamma(y + r)
|
|
49
|
+
- torch.lgamma(r)
|
|
50
|
+
- torch.lgamma(y + 1)
|
|
51
|
+
+ r * torch.log(r)
|
|
52
|
+
+ y * torch.log(mu)
|
|
53
|
+
- (r + y) * torch.log(r + mu)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# return the mixture, with an offset to prevent log(0)
|
|
57
|
+
return torch.log(pi * (y == 0) + (1 - pi) * torch.exp(negbin_loglikelihood) + 1e-10)
|
|
58
|
+
|
|
59
|
+
def invert(self, u: torch.Tensor, x: Dict[str, torch.Tensor]):
|
|
60
|
+
"""Invert pseudoobservations."""
|
|
61
|
+
mu, r, pi, u = self._local_params(x, u)
|
|
62
|
+
y = nbinom(n=r, p=r / (r + mu)).ppf(u)
|
|
63
|
+
delta = bernoulli(1 - pi).ppf(u)
|
|
64
|
+
return torch.from_numpy(y * delta).float()
|
|
65
|
+
|
|
66
|
+
def uniformize(self, y: torch.Tensor, x: Dict[str, torch.Tensor], epsilon=1e-6):
|
|
67
|
+
"""Return uniformized pseudo-observations for counts y given covariates x."""
|
|
68
|
+
# cdf values using scipy's parameterization
|
|
69
|
+
mu, r, pi, y = self._local_params(x, y)
|
|
70
|
+
nb_distn = nbinom(n=r, p=r / (r + mu))
|
|
71
|
+
u1 = pi + (1 - pi) * nb_distn.cdf(y)
|
|
72
|
+
u2 = np.where(y > 0, pi + (1 - pi) * nb_distn.cdf(y-1), 0)
|
|
73
|
+
|
|
74
|
+
# randomize within discrete mass to get uniform(0,1)
|
|
75
|
+
v = np.random.uniform(size=y.shape)
|
|
76
|
+
u = np.clip(v * u1 + (1 - v) * u2, epsilon, 1 - epsilon)
|
|
77
|
+
return torch.from_numpy(u).float()
|
|
78
|
+
|
|
79
|
+
def _local_params(self, x, y=None):
|
|
80
|
+
params = self.predict(x)
|
|
81
|
+
mu = params.get('mean')
|
|
82
|
+
r = params.get('dispersion')
|
|
83
|
+
pi = params.get('zero_inflation')
|
|
84
|
+
if y is None:
|
|
85
|
+
return _to_numpy(mu, pi, r)
|
|
86
|
+
return _to_numpy(mu, r, pi, y)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .bernoulli import bernoulli_predict
|
|
2
|
+
from .negbin import negbin_predict
|
|
3
|
+
from .poisson import poisson_predict
|
|
4
|
+
from .gaussian import gaussian_predict
|
|
5
|
+
from .zero_inflated_negbin import zero_inflated_negbin_predict
|
|
6
|
+
from .zero_inflated_poisson import zero_inflated_poisson_predict
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"bernoulli_predict",
|
|
10
|
+
"gaussian_predict",
|
|
11
|
+
"negbin_predict",
|
|
12
|
+
"poisson_predict",
|
|
13
|
+
"zero_inflated_negbin_predict",
|
|
14
|
+
"zero_inflated_poisson_predict",
|
|
15
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..format import format_matrix
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
def gaussian_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
|
|
7
|
+
# Standardize formula to dictionary format
|
|
8
|
+
if isinstance(formula, str):
|
|
9
|
+
formula = {'mean': formula, 'sdev': '~ 1'}
|
|
10
|
+
|
|
11
|
+
x_mean = format_matrix(obs, formula["mean"])
|
|
12
|
+
x_dispersion = format_matrix(obs, formula["sdev"])
|
|
13
|
+
|
|
14
|
+
sigma = np.exp(x_dispersion @ parameters["coef_sdev"])
|
|
15
|
+
mu = x_mean @ parameters["coef_mean"]
|
|
16
|
+
return {"mean": mu, "sdev": sigma}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..format import format_matrix
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
def negbin_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
|
|
7
|
+
# Standardize formula to dictionary format
|
|
8
|
+
if isinstance(formula, str):
|
|
9
|
+
formula = {'mean': formula, 'dispersion': '~ 1'}
|
|
10
|
+
|
|
11
|
+
x_mean = format_matrix(obs, formula["mean"])
|
|
12
|
+
x_dispersion = format_matrix(obs, formula["dispersion"])
|
|
13
|
+
|
|
14
|
+
r = np.exp(x_dispersion @ parameters["coef_dispersion"])
|
|
15
|
+
mu = np.exp(x_mean @ parameters["coef_mean"])
|
|
16
|
+
# r and mu are still dataframes with column names being the gene names
|
|
17
|
+
return {"mean": mu, "dispersion": r}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..format import format_matrix
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
def poisson_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
|
|
7
|
+
if isinstance(formula, dict):
|
|
8
|
+
formula = formula['mean']
|
|
9
|
+
x = format_matrix(obs, formula)
|
|
10
|
+
mu = np.exp(x @ parameters["coef_mean"])
|
|
11
|
+
return {"mean": mu}
|
|
12
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from ..format import format_matrix
|
|
4
|
+
from scipy.special import expit
|
|
5
|
+
from typing import Union
|
|
6
|
+
|
|
7
|
+
def zero_inflated_negbin_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
|
|
8
|
+
if isinstance(formula, str):
|
|
9
|
+
formula = {"mean": formula, "dispersion": "~ 1", "zero_inflation": "~ 1"}
|
|
10
|
+
x_mean = format_matrix(obs, formula["mean"])
|
|
11
|
+
x_dispersion = format_matrix(obs, formula["dispersion"])
|
|
12
|
+
x_zero_inflation = format_matrix(obs, formula["zero_inflation"])
|
|
13
|
+
r, mu, pi = (
|
|
14
|
+
np.exp(x_dispersion @ parameters["coef_dispersion"]),
|
|
15
|
+
np.exp(x_mean @ parameters["coef_mean"]),
|
|
16
|
+
expit(x_zero_inflation @ parameters["coef_zero_inflation"]),
|
|
17
|
+
)
|
|
18
|
+
return {"mean": mu, "dispersion": r, "zero_inflation": pi}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from ..format import format_matrix
|
|
2
|
+
from typing import Union
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def zero_inflated_poisson_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
|
|
8
|
+
if isinstance(formula, str):
|
|
9
|
+
formula = {'beta': formula, 'pi': '~ 1'}
|
|
10
|
+
mu, pi = (
|
|
11
|
+
np.exp(format_matrix(obs, formula['mean']) @ parameters["coef_mean"]),
|
|
12
|
+
sigmoid(format_matrix(obs, formula['zero_inflation']) @ parameters["coef_zero_inflation"]),
|
|
13
|
+
)
|
|
14
|
+
return {"mean": mu, "zero_inflation": pi}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sigmoid(x):
|
|
18
|
+
return 1 / (1 + np.exp(-x))
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from .negbin import negbin_sample, negbin_copula_sample
|
|
2
|
+
from .poisson import poisson_sample, poisson_copula_sample
|
|
3
|
+
from .bernoulli import bernoulli_sample, bernoulli_copula_sample
|
|
4
|
+
from .gaussian import gaussian_regression_sample, gaussian_copula_sample
|
|
5
|
+
from .zero_inflated_negbin import (
|
|
6
|
+
zero_inflated_negbin_sample,
|
|
7
|
+
zero_inflated_negbin_copula_sample,
|
|
8
|
+
)
|
|
9
|
+
from .zero_inflated_poisson import zero_inflated_poisson_sample
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"negbin_sample",
|
|
13
|
+
"negbin_copula_sample",
|
|
14
|
+
"poisson_sample",
|
|
15
|
+
"poisson_copula_sample",
|
|
16
|
+
"bernoulli_sample",
|
|
17
|
+
"bernoulli_copula_sample",
|
|
18
|
+
"gaussian_regression_sample",
|
|
19
|
+
"gaussian_copula_sample",
|
|
20
|
+
"zero_inflated_negbin_sample",
|
|
21
|
+
"zero_inflated_negbin_copula_sample",
|
|
22
|
+
"zero_inflated_poisson_sample",
|
|
23
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from . import glm_factory as glm
|
|
2
|
+
from scipy.stats import bernoulli
|
|
3
|
+
from typing import Union
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def bernoulli_regression_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
+
theta = local_parameters["mean"]
|
|
9
|
+
return bernoulli(theta).rvs()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def bernoulli_copula_sample_array(
|
|
13
|
+
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
14
|
+
) -> np.array:
|
|
15
|
+
# initialize uniformized gaussian samples
|
|
16
|
+
N, G = local_parameters["mean"].shape
|
|
17
|
+
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
18
|
+
|
|
19
|
+
theta = local_parameters["mean"]
|
|
20
|
+
return bernoulli(theta).ppf(u)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
bernoulli_sample = glm.glm_sample_factory(bernoulli_regression_sample_array)
|
|
24
|
+
|
|
25
|
+
bernoulli_copula_sample = glm.gaussian_copula_sample_factory(
|
|
26
|
+
bernoulli_copula_sample_array
|
|
27
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from scipy.stats import norm
|
|
2
|
+
from . import glm_factory as glm
|
|
3
|
+
from typing import Union
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def gaussian_regression_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
+
sigma, mu = local_parameters["sdev"], local_parameters["mean"] # dataframes of shape (n, g)
|
|
9
|
+
return norm(loc=mu, scale=sigma).rvs()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def gaussian_copula_sample_array(
|
|
13
|
+
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
14
|
+
) -> np.array:
|
|
15
|
+
# initialize uniformized gaussian samples
|
|
16
|
+
N, G = local_parameters["mean"].shape
|
|
17
|
+
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
18
|
+
|
|
19
|
+
# transform the correlated uniforms to NB space
|
|
20
|
+
sigma, mu = local_parameters["sdev"], local_parameters["mean"]
|
|
21
|
+
return norm(loc=mu, scale=sigma).ppf(u)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
gaussian_regression_sample = glm.glm_sample_factory(gaussian_regression_sample_array)
|
|
25
|
+
gaussian_copula_sample = glm.gaussian_copula_sample_factory(gaussian_copula_sample_array)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import anndata as ad
|
|
4
|
+
from typing import Union
|
|
5
|
+
from scipy.stats import norm
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def glm_sample_factory(sample_array):
|
|
9
|
+
def sampler(local_parameters: dict, obs: pd.DataFrame) -> ad.AnnData:
|
|
10
|
+
samples = sample_array(local_parameters)
|
|
11
|
+
result = ad.AnnData(X=samples, obs=obs)
|
|
12
|
+
result.var_names = local_parameters["mean"].columns
|
|
13
|
+
return result
|
|
14
|
+
return sampler
|
|
15
|
+
|
|
16
|
+
def gaussian_copula_pseudo_obs(N, G, sigma, groups):
|
|
17
|
+
u = np.zeros((N, G))
|
|
18
|
+
|
|
19
|
+
# cycle across groups
|
|
20
|
+
for group, ix in groups.items():
|
|
21
|
+
if type(sigma) is not dict:
|
|
22
|
+
sigma = {group: sigma}
|
|
23
|
+
|
|
24
|
+
z = np.random.multivariate_normal(
|
|
25
|
+
mean=np.zeros(G), cov=sigma[group], size=len(ix)
|
|
26
|
+
)
|
|
27
|
+
normal_distn = norm(0, np.diag(sigma[group] ** 0.5))
|
|
28
|
+
u[ix] = normal_distn.cdf(z)
|
|
29
|
+
return u
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def gaussian_copula_sample_factory(copula_sample_array):
|
|
33
|
+
def sampler(
|
|
34
|
+
local_parameters: dict, covariance: Union[dict, np.array], groups: dict, obs: pd.DataFrame
|
|
35
|
+
) -> ad.AnnData:
|
|
36
|
+
samples = copula_sample_array(local_parameters, covariance, groups)
|
|
37
|
+
result = ad.AnnData(X=samples, obs=obs)
|
|
38
|
+
result.var_names = local_parameters["mean"].columns
|
|
39
|
+
return result
|
|
40
|
+
return sampler
|
|
41
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from scipy.stats import nbinom
|
|
2
|
+
from . import glm_factory as glm
|
|
3
|
+
from typing import Union
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def negbin_regression_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
+
r, mu = local_parameters["dispersion"], local_parameters["mean"] # dataframes of shape (n, g)
|
|
9
|
+
return nbinom(n=r, p=r / (r + mu)).rvs()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def negbin_copula_sample_array(
|
|
13
|
+
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
14
|
+
) -> np.array:
|
|
15
|
+
# initialize uniformized gaussian samples
|
|
16
|
+
N, G = local_parameters["mean"].shape
|
|
17
|
+
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
18
|
+
|
|
19
|
+
# transform the correlated uniforms to NB space
|
|
20
|
+
r, mu = local_parameters["dispersion"], local_parameters["mean"]
|
|
21
|
+
return nbinom(n=r, p=r / (r + mu)).ppf(u)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
negbin_sample = glm.glm_sample_factory(negbin_regression_sample_array)
|
|
25
|
+
negbin_copula_sample = glm.gaussian_copula_sample_factory(negbin_copula_sample_array)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from . import glm_factory as glm
|
|
2
|
+
from scipy.stats import poisson
|
|
3
|
+
from typing import Union
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def poisson_regression_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
+
mu = local_parameters["mean"]
|
|
9
|
+
return poisson(mu).rvs()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def poisson_copula_sample_array(
|
|
13
|
+
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
14
|
+
) -> np.array:
|
|
15
|
+
# initialize uniformized gaussian samples
|
|
16
|
+
N, G = local_parameters["mean"].shape
|
|
17
|
+
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
18
|
+
|
|
19
|
+
# invert using poisson margins
|
|
20
|
+
mu = local_parameters["mean"]
|
|
21
|
+
return poisson(mu).ppf(u)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
poisson_sample = glm.glm_sample_factory(poisson_regression_sample_array)
|
|
25
|
+
poisson_copula_sample = glm.gaussian_copula_sample_factory(poisson_copula_sample_array)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from scipy.stats import nbinom, bernoulli
|
|
2
|
+
from . import glm_factory as glm
|
|
3
|
+
from typing import Union
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def zero_inflated_negbin_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
+
mu, r, pi = (
|
|
9
|
+
local_parameters["mean"],
|
|
10
|
+
local_parameters["dispersion"],
|
|
11
|
+
local_parameters["zero_inflation"],
|
|
12
|
+
)
|
|
13
|
+
return nbinom(n=r, p=r / (r + mu)).rvs() * bernoulli(1 - pi).rvs()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def zero_inflated_negbin_copula_sample_array(
|
|
17
|
+
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
18
|
+
) -> np.array:
|
|
19
|
+
# initialize uniformized gaussian samples
|
|
20
|
+
N, G = local_parameters["mean"].shape
|
|
21
|
+
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
22
|
+
|
|
23
|
+
# get zero inflated NB parameters
|
|
24
|
+
mu, r, pi = (
|
|
25
|
+
local_parameters["mean"],
|
|
26
|
+
local_parameters["dispersion"],
|
|
27
|
+
local_parameters["zero_inflation"],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# zero inflate after first simulating from NB
|
|
31
|
+
positive_part = nbinom(n=r, p=r / (r + mu)).ppf(u)
|
|
32
|
+
zero_inflation = bernoulli(1 - pi).ppf(u)
|
|
33
|
+
return zero_inflation * positive_part
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
zero_inflated_negbin_sample = glm.glm_sample_factory(zero_inflated_negbin_sample_array)
|
|
37
|
+
|
|
38
|
+
zero_inflated_negbin_copula_sample = glm.gaussian_copula_sample_factory(
|
|
39
|
+
zero_inflated_negbin_copula_sample_array
|
|
40
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from scipy.stats import poisson, bernoulli
|
|
2
|
+
from . import glm_factory as glm
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def zero_inflated_poisson_sample_array(local_parameters: dict) -> np.array:
|
|
7
|
+
mu, pi = (
|
|
8
|
+
local_parameters["mean"],
|
|
9
|
+
local_parameters["zero_inflation"],
|
|
10
|
+
)
|
|
11
|
+
return poisson(mu).rvs() * bernoulli(1 - pi).rvs()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
zero_inflated_poisson_sample = glm.glm_sample_factory(
|
|
15
|
+
zero_inflated_poisson_sample_array
|
|
16
|
+
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from .composite_regressor import CompositeGLMSimulator
|
|
2
|
+
from .glm_simulator import (
|
|
3
|
+
BernoulliCopulaSimulator,
|
|
4
|
+
BernoulliRegressionSimulator,
|
|
5
|
+
NegBinCopulaSimulator,
|
|
6
|
+
NegBinRegressionSimulator,
|
|
7
|
+
PoissonCopulaSimulator,
|
|
8
|
+
PoissonRegressionSimulator,
|
|
9
|
+
GaussianRegressionSimulator,
|
|
10
|
+
GaussianCopulaSimulator,
|
|
11
|
+
ZeroInflatedNegBinCopulaSimulator,
|
|
12
|
+
ZeroInflatedNegBinRegressionSimulator,
|
|
13
|
+
ZeroInflatedPoissonRegressionSimulator,
|
|
14
|
+
)
|
|
15
|
+
from .pnmf_regression import PNMFRegressionSimulator
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"BernoulliCopulaSimulator",
|
|
19
|
+
"BernoulliRegressionSimulator",
|
|
20
|
+
"CompositeGLMSimulator",
|
|
21
|
+
"GaussianRegressionSimulator",
|
|
22
|
+
"GaussianCopulaSimulator",
|
|
23
|
+
"NegBinCopulaSimulator",
|
|
24
|
+
"NegBinRegressionSimulator",
|
|
25
|
+
"PNMFRegressionSimulator",
|
|
26
|
+
"PoissonCopulaSimulator",
|
|
27
|
+
"PoissonRegressionSimulator",
|
|
28
|
+
"ZeroInflatedNegBinCopulaSimulator",
|
|
29
|
+
"ZeroInflatedNegBinRegressionSimulator",
|
|
30
|
+
"ZeroInflatedPoissonRegressionSimulator",
|
|
31
|
+
]
|