scdesigner 0.0.5__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scdesigner/base/__init__.py +8 -0
- scdesigner/base/copula.py +416 -0
- scdesigner/base/marginal.py +391 -0
- scdesigner/base/simulator.py +59 -0
- scdesigner/copulas/__init__.py +8 -0
- scdesigner/copulas/standard_copula.py +645 -0
- scdesigner/datasets/__init__.py +5 -0
- scdesigner/datasets/pancreas.py +39 -0
- scdesigner/distributions/__init__.py +19 -0
- scdesigner/{minimal → distributions}/bernoulli.py +42 -14
- scdesigner/distributions/gaussian.py +114 -0
- scdesigner/distributions/negbin.py +121 -0
- scdesigner/distributions/negbin_irls.py +72 -0
- scdesigner/distributions/negbin_irls_funs.py +456 -0
- scdesigner/distributions/poisson.py +88 -0
- scdesigner/{minimal → distributions}/zero_inflated_negbin.py +39 -10
- scdesigner/distributions/zero_inflated_poisson.py +103 -0
- scdesigner/simulators/__init__.py +24 -28
- scdesigner/simulators/composite.py +239 -0
- scdesigner/simulators/positive_nonnegative_matrix_factorization.py +477 -0
- scdesigner/simulators/scd3.py +486 -0
- scdesigner/transform/__init__.py +8 -6
- scdesigner/{minimal → transform}/transform.py +1 -1
- scdesigner/{minimal → utils}/kwargs.py +4 -1
- {scdesigner-0.0.5.dist-info → scdesigner-0.0.10.dist-info}/METADATA +1 -1
- scdesigner-0.0.10.dist-info/RECORD +28 -0
- {scdesigner-0.0.5.dist-info → scdesigner-0.0.10.dist-info}/WHEEL +1 -1
- scdesigner/data/__init__.py +0 -16
- scdesigner/data/formula.py +0 -137
- scdesigner/data/group.py +0 -123
- scdesigner/data/sparse.py +0 -39
- scdesigner/diagnose/__init__.py +0 -65
- scdesigner/diagnose/aic_bic.py +0 -119
- scdesigner/diagnose/plot.py +0 -242
- scdesigner/estimators/__init__.py +0 -32
- scdesigner/estimators/bernoulli.py +0 -85
- scdesigner/estimators/gaussian.py +0 -121
- scdesigner/estimators/gaussian_copula_factory.py +0 -367
- scdesigner/estimators/glm_factory.py +0 -75
- scdesigner/estimators/negbin.py +0 -153
- scdesigner/estimators/pnmf.py +0 -160
- scdesigner/estimators/poisson.py +0 -124
- scdesigner/estimators/zero_inflated_negbin.py +0 -195
- scdesigner/estimators/zero_inflated_poisson.py +0 -85
- scdesigner/format/__init__.py +0 -4
- scdesigner/format/format.py +0 -20
- scdesigner/format/print.py +0 -30
- scdesigner/minimal/__init__.py +0 -17
- scdesigner/minimal/composite.py +0 -119
- scdesigner/minimal/copula.py +0 -205
- scdesigner/minimal/formula.py +0 -23
- scdesigner/minimal/gaussian.py +0 -65
- scdesigner/minimal/loader.py +0 -211
- scdesigner/minimal/marginal.py +0 -154
- scdesigner/minimal/negbin.py +0 -73
- scdesigner/minimal/positive_nonnegative_matrix_factorization.py +0 -231
- scdesigner/minimal/scd3.py +0 -96
- scdesigner/minimal/scd3_instances.py +0 -50
- scdesigner/minimal/simulator.py +0 -25
- scdesigner/minimal/standard_copula.py +0 -383
- scdesigner/predictors/__init__.py +0 -15
- scdesigner/predictors/bernoulli.py +0 -9
- scdesigner/predictors/gaussian.py +0 -16
- scdesigner/predictors/negbin.py +0 -17
- scdesigner/predictors/poisson.py +0 -12
- scdesigner/predictors/zero_inflated_negbin.py +0 -18
- scdesigner/predictors/zero_inflated_poisson.py +0 -18
- scdesigner/samplers/__init__.py +0 -23
- scdesigner/samplers/bernoulli.py +0 -27
- scdesigner/samplers/gaussian.py +0 -25
- scdesigner/samplers/glm_factory.py +0 -103
- scdesigner/samplers/negbin.py +0 -25
- scdesigner/samplers/poisson.py +0 -25
- scdesigner/samplers/zero_inflated_negbin.py +0 -40
- scdesigner/samplers/zero_inflated_poisson.py +0 -16
- scdesigner/simulators/composite_regressor.py +0 -72
- scdesigner/simulators/glm_simulator.py +0 -167
- scdesigner/simulators/pnmf_regression.py +0 -61
- scdesigner/transform/amplify.py +0 -14
- scdesigner/transform/mask.py +0 -33
- scdesigner/transform/nullify.py +0 -25
- scdesigner/transform/split.py +0 -23
- scdesigner/transform/substitute.py +0 -14
- scdesigner-0.0.5.dist-info/RECORD +0 -66
scdesigner/samplers/negbin.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
from scipy.stats import nbinom
|
|
2
|
-
from . import glm_factory as glm
|
|
3
|
-
from typing import Union
|
|
4
|
-
import numpy as np
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def negbin_regression_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
-
r, mu = local_parameters["dispersion"], local_parameters["mean"] # dataframes of shape (n, g)
|
|
9
|
-
return nbinom(n=r, p=r / (r + mu)).rvs()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def negbin_copula_sample_array(
|
|
13
|
-
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
14
|
-
) -> np.array:
|
|
15
|
-
# initialize uniformized gaussian samples
|
|
16
|
-
N, G = local_parameters["mean"].shape
|
|
17
|
-
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
18
|
-
|
|
19
|
-
# transform the correlated uniforms to NB space
|
|
20
|
-
r, mu = local_parameters["dispersion"], local_parameters["mean"]
|
|
21
|
-
return nbinom(n=r, p=r / (r + mu)).ppf(u)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
negbin_sample = glm.glm_sample_factory(negbin_regression_sample_array)
|
|
25
|
-
negbin_copula_sample = glm.gaussian_copula_sample_factory(negbin_copula_sample_array)
|
scdesigner/samplers/poisson.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
from . import glm_factory as glm
|
|
2
|
-
from scipy.stats import poisson
|
|
3
|
-
from typing import Union
|
|
4
|
-
import numpy as np
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def poisson_regression_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
-
mu = local_parameters["mean"]
|
|
9
|
-
return poisson(mu).rvs()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def poisson_copula_sample_array(
|
|
13
|
-
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
14
|
-
) -> np.array:
|
|
15
|
-
# initialize uniformized gaussian samples
|
|
16
|
-
N, G = local_parameters["mean"].shape
|
|
17
|
-
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
18
|
-
|
|
19
|
-
# invert using poisson margins
|
|
20
|
-
mu = local_parameters["mean"]
|
|
21
|
-
return poisson(mu).ppf(u)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
poisson_sample = glm.glm_sample_factory(poisson_regression_sample_array)
|
|
25
|
-
poisson_copula_sample = glm.gaussian_copula_sample_factory(poisson_copula_sample_array)
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
from scipy.stats import nbinom, bernoulli
|
|
2
|
-
from . import glm_factory as glm
|
|
3
|
-
from typing import Union
|
|
4
|
-
import numpy as np
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def zero_inflated_negbin_sample_array(local_parameters: dict) -> np.array:
|
|
8
|
-
mu, r, pi = (
|
|
9
|
-
local_parameters["mean"],
|
|
10
|
-
local_parameters["dispersion"],
|
|
11
|
-
local_parameters["zero_inflation"],
|
|
12
|
-
)
|
|
13
|
-
return nbinom(n=r, p=r / (r + mu)).rvs() * bernoulli(1 - pi).rvs()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def zero_inflated_negbin_copula_sample_array(
|
|
17
|
-
local_parameters: dict, covariance: Union[dict, np.array], groups: dict
|
|
18
|
-
) -> np.array:
|
|
19
|
-
# initialize uniformized gaussian samples
|
|
20
|
-
N, G = local_parameters["mean"].shape
|
|
21
|
-
u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
|
|
22
|
-
|
|
23
|
-
# get zero inflated NB parameters
|
|
24
|
-
mu, r, pi = (
|
|
25
|
-
local_parameters["mean"],
|
|
26
|
-
local_parameters["dispersion"],
|
|
27
|
-
local_parameters["zero_inflation"],
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
# zero inflate after first simulating from NB
|
|
31
|
-
positive_part = nbinom(n=r, p=r / (r + mu)).ppf(u)
|
|
32
|
-
zero_inflation = bernoulli(1 - pi).ppf(u)
|
|
33
|
-
return zero_inflation * positive_part
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
zero_inflated_negbin_sample = glm.glm_sample_factory(zero_inflated_negbin_sample_array)
|
|
37
|
-
|
|
38
|
-
zero_inflated_negbin_copula_sample = glm.gaussian_copula_sample_factory(
|
|
39
|
-
zero_inflated_negbin_copula_sample_array
|
|
40
|
-
)
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from scipy.stats import poisson, bernoulli
|
|
2
|
-
from . import glm_factory as glm
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def zero_inflated_poisson_sample_array(local_parameters: dict) -> np.array:
|
|
7
|
-
mu, pi = (
|
|
8
|
-
local_parameters["mean"],
|
|
9
|
-
local_parameters["zero_inflation"],
|
|
10
|
-
)
|
|
11
|
-
return poisson(mu).rvs() * bernoulli(1 - pi).rvs()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
zero_inflated_poisson_sample = glm.glm_sample_factory(
|
|
15
|
-
zero_inflated_poisson_sample_array
|
|
16
|
-
)
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
import anndata
|
|
2
|
-
import pandas as pd
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class CompositeGLMSimulator:
|
|
6
|
-
def __init__(self, specification: dict, **kwargs):
|
|
7
|
-
self.specification = specification
|
|
8
|
-
self.params = {}
|
|
9
|
-
self.hyperparams = kwargs
|
|
10
|
-
|
|
11
|
-
for k in self.specification:
|
|
12
|
-
if "_fitted" not in self.specification[k].keys():
|
|
13
|
-
self.specification[k]["_fitted"] = False
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def fit(self, adata: anndata.AnnData) -> dict:
|
|
17
|
-
self.specification = fill_var_names(self.specification, list(adata.var_names))
|
|
18
|
-
|
|
19
|
-
for k, spec in self.specification.items():
|
|
20
|
-
if not spec["_fitted"]:
|
|
21
|
-
spec["simulator"].fit(adata[:, spec["var_names"]], spec["formula"])
|
|
22
|
-
self.params[k] = subset_params(spec["simulator"].params, list(spec["var_names"]))
|
|
23
|
-
self.specification[k]["simulator"].params = self.params[k]
|
|
24
|
-
self.specification[k]["_fitted"] = True
|
|
25
|
-
|
|
26
|
-
def sample(self, obs: pd.DataFrame) -> anndata.AnnData:
|
|
27
|
-
anndata_list = []
|
|
28
|
-
for spec in self.specification.values():
|
|
29
|
-
anndata_list.append(spec["simulator"].sample(obs))
|
|
30
|
-
return anndata.concat(anndata_list, axis="var")
|
|
31
|
-
|
|
32
|
-
def predict(self, obs: pd.DataFrame) -> dict:
|
|
33
|
-
preds = {}
|
|
34
|
-
for k, spec in self.specification.items():
|
|
35
|
-
preds[k] = spec["simulator"].predict(obs)
|
|
36
|
-
return preds
|
|
37
|
-
|
|
38
|
-
def __repr__(self):
|
|
39
|
-
var_names = {k: list_string(v["var_names"]) for k, v in self.specification.items()}
|
|
40
|
-
simulators = {k: v["simulator"] for k, v in self.specification.items()}
|
|
41
|
-
return f"""scDesigner simulator object with
|
|
42
|
-
method: 'Composite'
|
|
43
|
-
features: {var_names}
|
|
44
|
-
simulators: {simulators}"""
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def list_string(l):
|
|
48
|
-
if l is None:
|
|
49
|
-
return
|
|
50
|
-
if len(l) <= 3:
|
|
51
|
-
return ", ".join(l)
|
|
52
|
-
return f"[{l[0]},{l[1]}, ..., {l[-1]}]"
|
|
53
|
-
|
|
54
|
-
def fill_var_names(specification, var_names):
|
|
55
|
-
all_names = []
|
|
56
|
-
for k, v in specification.items():
|
|
57
|
-
if v["var_names"] is not None:
|
|
58
|
-
all_names += list(v["var_names"])
|
|
59
|
-
|
|
60
|
-
for k, v in specification.items():
|
|
61
|
-
if v["var_names"] is None:
|
|
62
|
-
specification[k]["var_names"] = list(set(var_names) - set(all_names))
|
|
63
|
-
return specification
|
|
64
|
-
|
|
65
|
-
def subset_params(params, var_names):
|
|
66
|
-
result = {}
|
|
67
|
-
for k, v in params.items():
|
|
68
|
-
if "covariance" in k:
|
|
69
|
-
result[k] = v.loc[var_names, var_names]
|
|
70
|
-
else:
|
|
71
|
-
result[k] = v.loc[:, var_names]
|
|
72
|
-
return result
|
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
from .. import estimators as est
|
|
2
|
-
from .. import predictors as prd
|
|
3
|
-
from .. import samplers as smp
|
|
4
|
-
from .. import diagnose
|
|
5
|
-
from anndata import AnnData
|
|
6
|
-
import pandas as pd
|
|
7
|
-
from typing import Union
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def glm_simulator_generator(class_name, regressor, sampler, predictor, diagnose):
|
|
11
|
-
def __init__(self, **kwargs):
|
|
12
|
-
self.formula = None # formula should be a string or a dictionary of strings
|
|
13
|
-
self.params = None
|
|
14
|
-
self.marginal_aic = None
|
|
15
|
-
self.marginal_bic = None
|
|
16
|
-
self.copula_aic = None
|
|
17
|
-
self.copula_bic = None
|
|
18
|
-
self.hyperparams = kwargs
|
|
19
|
-
self.filtered_kwargs = {k: kwargs[k] for k in ["chunk_size", "batch_size"] if k in kwargs}
|
|
20
|
-
|
|
21
|
-
if "Copula" not in class_name:
|
|
22
|
-
# fitting and sampling methods for plain regressors
|
|
23
|
-
def fit(self, adata: AnnData, formula: Union[str, dict]) -> dict:
|
|
24
|
-
self.formula = formula
|
|
25
|
-
self.params = regressor(adata, formula, **self.hyperparams)
|
|
26
|
-
self.marginal_aic, self.marginal_bic = diagnose(
|
|
27
|
-
self.params, adata, formula, **self.filtered_kwargs
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def sample(self, obs: pd.DataFrame) -> AnnData:
|
|
31
|
-
local_parameters = self.predict(obs) # a dictionary of parameters
|
|
32
|
-
return sampler(local_parameters, obs)
|
|
33
|
-
|
|
34
|
-
else:
|
|
35
|
-
# fitting and sampling for gaussian copula models
|
|
36
|
-
|
|
37
|
-
def fit(
|
|
38
|
-
self, adata: AnnData, formula: Union[str, dict] = "~ 1", copula_groups: str = None
|
|
39
|
-
) -> dict:
|
|
40
|
-
self.formula = formula
|
|
41
|
-
self.copula_groups = copula_groups
|
|
42
|
-
self.params = regressor(adata, formula, copula_groups, **self.hyperparams)
|
|
43
|
-
self.marginal_aic, self.marginal_bic, self.copula_aic, self.copula_bic = diagnose(
|
|
44
|
-
self.params, adata, formula, copula_groups, **self.filtered_kwargs
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def sample(self, obs: pd.DataFrame) -> AnnData:
|
|
48
|
-
groups = est.gaussian_copula_factory.group_indices(self.copula_groups, obs)
|
|
49
|
-
local_parameters = self.predict(obs)
|
|
50
|
-
return sampler(local_parameters, self.params["covariance"], groups, obs)
|
|
51
|
-
|
|
52
|
-
def predict(self, obs: pd.DataFrame) -> dict:
|
|
53
|
-
return predictor(self.params, obs, self.formula)
|
|
54
|
-
# The predictor function should handle different formula types: dict or string
|
|
55
|
-
|
|
56
|
-
def __repr__(self):
|
|
57
|
-
params_string = ", ".join(
|
|
58
|
-
[
|
|
59
|
-
f"{k} [{self.params[k].shape[0]}x{self.params[k].shape[1]}]"
|
|
60
|
-
for k in self.params.keys()
|
|
61
|
-
]
|
|
62
|
-
)
|
|
63
|
-
return f"""scDesigner simulator object with
|
|
64
|
-
method: {self.__class__.__name__}
|
|
65
|
-
formula: {self.formula}
|
|
66
|
-
parameters: {params_string}"""
|
|
67
|
-
|
|
68
|
-
return type(
|
|
69
|
-
class_name,
|
|
70
|
-
(),
|
|
71
|
-
{
|
|
72
|
-
"__init__": __init__,
|
|
73
|
-
"fit": fit,
|
|
74
|
-
"sample": sample,
|
|
75
|
-
"predict": predict,
|
|
76
|
-
"__repr__": __repr__,
|
|
77
|
-
},
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
NegBinRegressionSimulator = glm_simulator_generator(
|
|
82
|
-
"NegBinRegressionSimulator",
|
|
83
|
-
est.negbin_regression,
|
|
84
|
-
smp.negbin_sample,
|
|
85
|
-
prd.negbin_predict,
|
|
86
|
-
diagnose.negbin_regression_diagnose
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
NegBinCopulaSimulator = glm_simulator_generator(
|
|
90
|
-
"NegBinCopulaSimulator",
|
|
91
|
-
est.negbin_copula,
|
|
92
|
-
smp.negbin_copula_sample,
|
|
93
|
-
prd.negbin_predict,
|
|
94
|
-
diagnose.negbin_gcopula_diagnose,
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
PoissonRegressionSimulator = glm_simulator_generator(
|
|
98
|
-
"PoissonRegressionSimulator",
|
|
99
|
-
est.poisson_regression,
|
|
100
|
-
smp.poisson_sample,
|
|
101
|
-
prd.poisson_predict,
|
|
102
|
-
diagnose.poisson_regression_diagnose,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
PoissonCopulaSimulator = glm_simulator_generator(
|
|
106
|
-
"PoissonCopulaSimulator",
|
|
107
|
-
est.poisson_copula,
|
|
108
|
-
smp.poisson_copula_sample,
|
|
109
|
-
prd.poisson_predict,
|
|
110
|
-
diagnose.poisson_gcopula_diagnose,
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
BernoulliRegressionSimulator = glm_simulator_generator(
|
|
114
|
-
"BernoulliRegressionSimulator",
|
|
115
|
-
est.bernoulli_regression,
|
|
116
|
-
smp.bernoulli_sample,
|
|
117
|
-
prd.bernoulli_predict,
|
|
118
|
-
diagnose.bernoulli_regression_diagnose,
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
BernoulliCopulaSimulator = glm_simulator_generator(
|
|
122
|
-
"BernoulliCopulaSimulator",
|
|
123
|
-
est.bernoulli_copula,
|
|
124
|
-
smp.bernoulli_copula_sample,
|
|
125
|
-
prd.bernoulli_predict,
|
|
126
|
-
diagnose.bernoulli_gcopula_diagnose,
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
ZeroInflatedNegBinRegressionSimulator = glm_simulator_generator(
|
|
130
|
-
"ZeroInflatedNegbinRegressionSimulator",
|
|
131
|
-
est.zero_inflated_negbin_regression,
|
|
132
|
-
smp.zero_inflated_negbin_sample,
|
|
133
|
-
prd.zero_inflated_negbin_predict,
|
|
134
|
-
diagnose.zinb_regression_diagnose,
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
ZeroInflatedNegBinCopulaSimulator = glm_simulator_generator(
|
|
138
|
-
"ZeroInflatedNegBinCopulaSimulator",
|
|
139
|
-
est.zero_inflated_negbin_copula,
|
|
140
|
-
smp.zero_inflated_negbin_copula_sample,
|
|
141
|
-
prd.zero_inflated_negbin_predict,
|
|
142
|
-
diagnose.zinb_gcopula_diagnose,
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
ZeroInflatedPoissonRegressionSimulator = glm_simulator_generator(
|
|
146
|
-
"ZeroInflatedNegbinRegressionSimulator",
|
|
147
|
-
est.zero_inflated_poisson_regression,
|
|
148
|
-
smp.zero_inflated_poisson_sample,
|
|
149
|
-
prd.zero_inflated_poisson_predict,
|
|
150
|
-
diagnose.zip_regression_diagnose,
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
GaussianRegressionSimulator = glm_simulator_generator(
|
|
154
|
-
"GaussianRegressionSimulator",
|
|
155
|
-
est.gaussian_regression,
|
|
156
|
-
smp.gaussian_regression_sample,
|
|
157
|
-
prd.gaussian_predict,
|
|
158
|
-
diagnose.gaussian_regression_diagnose
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
GaussianCopulaSimulator = glm_simulator_generator(
|
|
162
|
-
"GaussianCopulaSimulator",
|
|
163
|
-
est.gaussian_copula,
|
|
164
|
-
smp.gaussian_copula_sample,
|
|
165
|
-
prd.gaussian_predict,
|
|
166
|
-
diagnose.gaussian_gcopula_diagnose
|
|
167
|
-
)
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
from anndata import AnnData
|
|
2
|
-
from formulaic import model_matrix
|
|
3
|
-
from ..format import format_input_anndata, format_matrix
|
|
4
|
-
from scipy.stats import gamma
|
|
5
|
-
from ..estimators.pnmf import pnmf, gamma_regression_array, format_gamma_parameters
|
|
6
|
-
import numpy as np
|
|
7
|
-
import pandas as pd
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class PNMFRegressionSimulator:
|
|
11
|
-
def __init__(self, nbase=20, maxIter=100, **kwargs): # default input: cell x gene
|
|
12
|
-
self.var_names = None
|
|
13
|
-
self.formula = None
|
|
14
|
-
self.params = None
|
|
15
|
-
self.hyperparams = {"pnmf": {"nbase": nbase, "maxIter": maxIter}, "gamma": kwargs}
|
|
16
|
-
|
|
17
|
-
def fit(self, adata, formula: str):
|
|
18
|
-
adata = format_input_anndata(adata)
|
|
19
|
-
self.var_names = adata.var_names
|
|
20
|
-
self.formula = formula
|
|
21
|
-
log_data = np.log1p(adata.X).T
|
|
22
|
-
W, S = pnmf(log_data, **self.hyperparams["pnmf"])
|
|
23
|
-
adata = AnnData(X=S.T, obs=adata.obs)
|
|
24
|
-
|
|
25
|
-
x = model_matrix(formula, adata.obs)
|
|
26
|
-
parameters = gamma_regression_array(np.array(x), adata.X, **self.hyperparams["gamma"])
|
|
27
|
-
parameters["W"] = W
|
|
28
|
-
self.params = format_gamma_parameters(
|
|
29
|
-
parameters, list(self.var_names), list(x.columns)
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
def sample(self, obs: pd.DataFrame) -> AnnData:
|
|
33
|
-
W = self.params["W"]
|
|
34
|
-
params = self.predict(obs)
|
|
35
|
-
a, loc, beta = params["a"], params["loc"], params["beta"]
|
|
36
|
-
sim_score = gamma(a, loc, 1 / beta).rvs()
|
|
37
|
-
samples = np.exp(W @ sim_score.T).T
|
|
38
|
-
|
|
39
|
-
# thresholding samples
|
|
40
|
-
floor = np.floor(samples)
|
|
41
|
-
samples = floor + np.where(samples - floor < 0.9, 0, 1) - 1
|
|
42
|
-
samples = np.where(samples < 0, 0, samples)
|
|
43
|
-
|
|
44
|
-
result = AnnData(X=samples, obs=obs)
|
|
45
|
-
result.var_names = self.var_names
|
|
46
|
-
return result
|
|
47
|
-
|
|
48
|
-
def predict(self, obs: pd.DataFrame) -> dict:
|
|
49
|
-
x = format_matrix(obs, self.formula)
|
|
50
|
-
a, loc, beta = (
|
|
51
|
-
x @ np.exp(self.params["a"]),
|
|
52
|
-
x @ self.params["loc"],
|
|
53
|
-
x @ np.exp(self.params["beta"]),
|
|
54
|
-
)
|
|
55
|
-
return {"a": a, "loc": loc, "beta": beta}
|
|
56
|
-
|
|
57
|
-
def __repr__(self):
|
|
58
|
-
return f"""scDesigner simulator object with
|
|
59
|
-
method: 'PNMF Regression'
|
|
60
|
-
formula: '{self.formula}'
|
|
61
|
-
parameters: 'a', 'loc', 'beta', 'W'"""
|
scdesigner/transform/amplify.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from typing import Union
|
|
3
|
-
from copy import deepcopy
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def amplify(
|
|
7
|
-
params: dict, id: str, mask: Union[np.array, None] = None, factor: float = 1
|
|
8
|
-
) -> dict:
|
|
9
|
-
if mask is None:
|
|
10
|
-
mask = np.ones(params[id].shape)
|
|
11
|
-
|
|
12
|
-
result = deepcopy(params)
|
|
13
|
-
result[id].values[mask] = factor * result[id].values[mask]
|
|
14
|
-
return result
|
scdesigner/transform/mask.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import re
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def str_match(string: str, string_list: list) -> bool:
|
|
6
|
-
for l in string_list:
|
|
7
|
-
if l in string:
|
|
8
|
-
return True
|
|
9
|
-
return False
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def data_frame_mask(df, row_pattern=".", col_pattern=".") -> np.array:
|
|
13
|
-
mask = np.zeros(df.shape, dtype=bool)
|
|
14
|
-
if isinstance(col_pattern, str):
|
|
15
|
-
col_pattern = [col_pattern]
|
|
16
|
-
if isinstance(row_pattern, str):
|
|
17
|
-
row_pattern = [row_pattern]
|
|
18
|
-
|
|
19
|
-
# check for columns that match at least one pattern
|
|
20
|
-
col_matches = np.zeros(df.shape[1], dtype=bool)
|
|
21
|
-
for col in df.columns:
|
|
22
|
-
if any(re.search(pat, col) for pat in col_pattern):
|
|
23
|
-
col_matches[df.columns.get_loc(col)] = True
|
|
24
|
-
|
|
25
|
-
# same with rows
|
|
26
|
-
row_matches = np.zeros(df.shape[0], dtype=bool)
|
|
27
|
-
for idx in df.index:
|
|
28
|
-
if any(re.search(pat, str(idx)) for pat in row_pattern):
|
|
29
|
-
row_matches[df.index.get_loc(idx)] = True
|
|
30
|
-
|
|
31
|
-
# set mask to the intersection of row and column matches
|
|
32
|
-
mask = np.outer(row_matches, col_matches)
|
|
33
|
-
return mask
|
scdesigner/transform/nullify.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
from typing import Union
|
|
2
|
-
from copy import deepcopy
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def nullify(params: dict, id: str, mask: Union[np.array, None] = None) -> dict:
|
|
7
|
-
"""
|
|
8
|
-
# fit a model
|
|
9
|
-
example_sce = anndata.read_h5ad(save_path)
|
|
10
|
-
x = formulaic.model_matrix("~ bs(pseudotime, degree=2)", example_sce.obs)
|
|
11
|
-
x = x.to_numpy()
|
|
12
|
-
y = example_sce.X.todense()
|
|
13
|
-
params = negative_binomial_copula(x, y)
|
|
14
|
-
|
|
15
|
-
# nullify pseudotime for first ten genes
|
|
16
|
-
outcomes = example_sce.var_names[:10]
|
|
17
|
-
mask = anndata_formula_mask(outcomes, ["pseudotime], "~ bs(pseudotime, degree=2)", example_sce)
|
|
18
|
-
null_params = nullify(params, "beta", mask)
|
|
19
|
-
"""
|
|
20
|
-
if mask is None:
|
|
21
|
-
mask = np.ones(params[id].shape, dtype=bool)
|
|
22
|
-
|
|
23
|
-
result = deepcopy(params)
|
|
24
|
-
result[id][mask] = 0
|
|
25
|
-
return result
|
scdesigner/transform/split.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from copy import deepcopy
|
|
2
|
-
from ..simulators import CompositeGLMSimulator
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def split_glm(simulator, submodel, keys=["group1", "group2"]):
|
|
6
|
-
initial_model = {
|
|
7
|
-
"formula": simulator.formula,
|
|
8
|
-
"simulator": simulator,
|
|
9
|
-
"var_names": None,
|
|
10
|
-
"_fitted": True,
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
# default to the formula in original model
|
|
14
|
-
if not "formula" in submodel:
|
|
15
|
-
submodel["formula"] = simulator.formula
|
|
16
|
-
|
|
17
|
-
# default to the original simulator type, but remove existing parameters
|
|
18
|
-
if not "simulator" in submodel:
|
|
19
|
-
submodel["simulator"] = deepcopy(simulator)
|
|
20
|
-
submodel["simulator"].params = None
|
|
21
|
-
|
|
22
|
-
specification = {keys[0]: initial_model, keys[1]: submodel}
|
|
23
|
-
return CompositeGLMSimulator(specification)
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from copy import deepcopy
|
|
3
|
-
from typing import Union
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def substitute(
|
|
7
|
-
params: dict, id: str, new_value: np.array, mask: Union[np.array, None] = None
|
|
8
|
-
) -> dict:
|
|
9
|
-
if mask is None:
|
|
10
|
-
mask = np.ones(params[id].shape)
|
|
11
|
-
|
|
12
|
-
result = deepcopy(params)
|
|
13
|
-
result[id][mask] = new_value
|
|
14
|
-
return result
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
scdesigner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
scdesigner/data/__init__.py,sha256=FvmtbC-bO1wOgqjsDexbH_oTNzYd4XyxQcdRMPtz-_s,562
|
|
3
|
-
scdesigner/data/formula.py,sha256=q1NrD8FkCQqtFLGVmCjTSkpIpIQPlfYasykDylodIWQ,4479
|
|
4
|
-
scdesigner/data/group.py,sha256=XGh1XTXE3BzkS4HGDVAs056TX-EbCk3xMcdGL7akHw4,4151
|
|
5
|
-
scdesigner/data/sparse.py,sha256=lMp8gI8sq_fUTj3HiAmia0YiCdMRS1QGyyRePmuq3zY,1384
|
|
6
|
-
scdesigner/diagnose/__init__.py,sha256=XRBlc0_ns9Tbwq-aMWMIv1c-tcTOgRFQBywBFV46Gt4,3579
|
|
7
|
-
scdesigner/diagnose/aic_bic.py,sha256=9GmtxdEXbbmCvL__pm7jSvQeeCb8KjwawY3UebahVBs,5022
|
|
8
|
-
scdesigner/diagnose/plot.py,sha256=JP1vLbZVnMs171aOBawKbbCUgGiwIfCGKWrQyNh7Y2s,7225
|
|
9
|
-
scdesigner/estimators/__init__.py,sha256=TDkbc25TvXJp5O4_U2QipM8tmEoJ0HpNG53rlYthsBk,1221
|
|
10
|
-
scdesigner/estimators/bernoulli.py,sha256=vnIETFbHjXErgkKps4ImgeM5W6nta0c-8-ZetYpqK8g,3324
|
|
11
|
-
scdesigner/estimators/gaussian.py,sha256=-CKumYQWw8KbshdCmCdFgpcH5dNbqY-3vNTXCyjJbqc,4311
|
|
12
|
-
scdesigner/estimators/gaussian_copula_factory.py,sha256=yeIx_C2fN2jiotTkJstV4pjQYW4aO6RJ-jMuuXR2HVs,14054
|
|
13
|
-
scdesigner/estimators/glm_factory.py,sha256=tjVlEfJwBPK_Vk4G4P_eTYnBqeI8Y0r5_u_iSYB7qfA,2618
|
|
14
|
-
scdesigner/estimators/negbin.py,sha256=4q_XZOZA8gYHXOOY5Mz17cexs-kzadqB9JlxqWnZ7VQ,5639
|
|
15
|
-
scdesigner/estimators/pnmf.py,sha256=-0WDbwh0uU4V-eyl_0uQ3qLZpT2IuEo9Pa-GEC6vJYI,4781
|
|
16
|
-
scdesigner/estimators/poisson.py,sha256=FMYtkip2NDYmmkyEuO_9hSuM6gtM-AZpPa9GwFrFfD8,4174
|
|
17
|
-
scdesigner/estimators/zero_inflated_negbin.py,sha256=OeMbXf20wzhJUmll2meMXBdLfeHGSya2hikmXNRUKI4,7959
|
|
18
|
-
scdesigner/estimators/zero_inflated_poisson.py,sha256=2lrD2H4QMBxhNbak3gDUHVxPBURLS16IyfN5eCEbq9Q,3364
|
|
19
|
-
scdesigner/format/__init__.py,sha256=PR12wZFvixIqHEd--d1oZkuj6o8tAQx-rgnpUKkr03I,179
|
|
20
|
-
scdesigner/format/format.py,sha256=WLsGnfeM52Mg3fhKHwPx0XbkYJSXfehu2_HmQfUpHdY,521
|
|
21
|
-
scdesigner/format/print.py,sha256=HK3yLQcFw-f5-nSxMy52bD8Mixw_xxAYV2W9K4_ULwg,794
|
|
22
|
-
scdesigner/minimal/__init__.py,sha256=IdMK1a2iiYyJ1gsWoupc2_3wyEu3Udjbm_iC0U724kM,378
|
|
23
|
-
scdesigner/minimal/bernoulli.py,sha256=KvkXiS5aOYIT_L_xymaAWd92ZJyRXZowTcOv_b1RsJo,2304
|
|
24
|
-
scdesigner/minimal/composite.py,sha256=lOafmycSpW5U7Yrmm_NgRJx8Y9GvkuS0BVf8IGFse9c,4536
|
|
25
|
-
scdesigner/minimal/copula.py,sha256=nNnK9Pxrp4-jlYWFY4NBt0hiUXGVHjMQMH5P5EhxLVw,8884
|
|
26
|
-
scdesigner/minimal/formula.py,sha256=VFTadNxn-2elBvCkD_yuyh9O-vDZFiitSqjJkZYJy3Y,879
|
|
27
|
-
scdesigner/minimal/gaussian.py,sha256=CKsluKk_XduYMNNUZTtENzH3wf4iTRGRc4L2nzq7qYw,2351
|
|
28
|
-
scdesigner/minimal/kwargs.py,sha256=a32BLKNBj7ont5fR_uUpppYwel55dpP3fqaPeVbmCKM,979
|
|
29
|
-
scdesigner/minimal/loader.py,sha256=N6bFyRkJGkpi1NIHU9gwfJTbBP_Z51p6tX2ymxML5Fk,7483
|
|
30
|
-
scdesigner/minimal/marginal.py,sha256=5SB1biCpO8PzMb_--mzBdpTHrnbTEt0BMp7188afxd0,5790
|
|
31
|
-
scdesigner/minimal/negbin.py,sha256=NorV0CQ_wmKwTWuWd-Y1mE3gWYmVPHgmjp3pgI86bEI,2563
|
|
32
|
-
scdesigner/minimal/positive_nonnegative_matrix_factorization.py,sha256=oMIC1aqdH4Cgn2pU446lh2-H3axEe-FSC_ZNcgSBrdk,7445
|
|
33
|
-
scdesigner/minimal/scd3.py,sha256=4EKi6Wonc0LRfWjLdvn8HMhbuR-c5kZbmVNLGbJd7mw,3083
|
|
34
|
-
scdesigner/minimal/scd3_instances.py,sha256=vbCbF_SbCqyCUdi_STwV2ufn9ouoEk14_D9g4Wc9O14,1969
|
|
35
|
-
scdesigner/minimal/simulator.py,sha256=DmeT_uXswR9larJ_OuysXUt32BqIVwxS-2yYMk9PGQw,631
|
|
36
|
-
scdesigner/minimal/standard_copula.py,sha256=_NHCKL90eR7oHmgYtZAC9dyZDBKqYj987nrUxYoDAtM,16204
|
|
37
|
-
scdesigner/minimal/transform.py,sha256=j2sj2vHpFi3YdK9sz4Npl05i8UPGBcDU3kT-n72u28s,5459
|
|
38
|
-
scdesigner/minimal/zero_inflated_negbin.py,sha256=npDoyAGWs9n5rl3HwvAUvDG9UDG3DlwRMUGwkpeqvlc,3179
|
|
39
|
-
scdesigner/predictors/__init__.py,sha256=3ycFB7ifR2y-27Kx2GhESyfcyZsJ-6ds6vxgzwrl6ss,462
|
|
40
|
-
scdesigner/predictors/bernoulli.py,sha256=ln7GpOTh7nxCVX0nvARPXw0E-l3SalDwur0iUEz-SWQ,261
|
|
41
|
-
scdesigner/predictors/gaussian.py,sha256=R7zhob4AVExAJYGcV51IvjczmlrG5_QmZSFuE3BS2zk,572
|
|
42
|
-
scdesigner/predictors/negbin.py,sha256=B08J_Y5eXbYc9tHH2PZxxnwYXQUaGKmoaAiB1X2wqQw,669
|
|
43
|
-
scdesigner/predictors/poisson.py,sha256=FWrUbzGEbsRmGCV36RjE2AVmbQCgvFXen7yFB6E6JCg,359
|
|
44
|
-
scdesigner/predictors/zero_inflated_negbin.py,sha256=t-lIFsNfzMKdPJFbj8c1B26E6M4_atrCZeUNK4Ujp2s,792
|
|
45
|
-
scdesigner/predictors/zero_inflated_poisson.py,sha256=Krr4r-9vkh7SVy3d45v226NVUYia7AuY6GOEeulMRXI,577
|
|
46
|
-
scdesigner/samplers/__init__.py,sha256=ns3tA_Q7jYIPgnYPfItvHCUpBV9fDimUg0dEHcNB7wE,775
|
|
47
|
-
scdesigner/samplers/bernoulli.py,sha256=1GLOU69_D2sxcedmxC2dIj1MQbmX5D8i0ccvyxjTcR8,791
|
|
48
|
-
scdesigner/samplers/gaussian.py,sha256=H8DwD4XzzALFguxB1uiwUsUURF_G1B6d2j4r0up-srU,943
|
|
49
|
-
scdesigner/samplers/glm_factory.py,sha256=aQvFTgBKAk7kV-SnFJ61aSHQGwemxW9Hl4ZXBmwGJug,3607
|
|
50
|
-
scdesigner/samplers/negbin.py,sha256=d9oWIDxfXl_kNOPCjYUvtVie3uQmBN6yHfxHbrQiX-I,930
|
|
51
|
-
scdesigner/samplers/poisson.py,sha256=_TxotqGuVDFRXJOIiBGqRE_qJK0XkXn96clkCDCRV1A,789
|
|
52
|
-
scdesigner/samplers/zero_inflated_negbin.py,sha256=Z9kUSEQL8Z-lf48wWBLAuz74FcK7mjsKmeIzyBE9ov4,1287
|
|
53
|
-
scdesigner/samplers/zero_inflated_poisson.py,sha256=rUvUZN9_nUfE7mcbnAA6foKv42owwOdxaEPLyuimxSQ,425
|
|
54
|
-
scdesigner/simulators/__init__.py,sha256=D8xA3aR2FFVMqkYttJPT2FqdKN_PeyweMNVQk9V0w-E,976
|
|
55
|
-
scdesigner/simulators/composite_regressor.py,sha256=eKwTnAOT5l2aIZ0X7AMchz4EEdzElr2NkfugIaAVt7o,2454
|
|
56
|
-
scdesigner/simulators/glm_simulator.py,sha256=c01-5Fo2vvihaof5w_eXCWmMnHyh5TYbj9sIwc7H2tw,5410
|
|
57
|
-
scdesigner/simulators/pnmf_regression.py,sha256=B_fMK7Q9D4q31HT77384EKZEST7XXVNC3v4MNP41Vw0,2214
|
|
58
|
-
scdesigner/transform/__init__.py,sha256=cuLIP0_tocIA3dupO7npH2mLFpL1ApLZlLWPudzPt6M,236
|
|
59
|
-
scdesigner/transform/amplify.py,sha256=aNxpuyoDpXI4xK5FyCaGKqLqUh4SxFCZhwZeW8XonEQ,351
|
|
60
|
-
scdesigner/transform/mask.py,sha256=z-NQ6xcnEzCFWvlfCRhQOrE-TWvtAwQ6Cs8KBPahBSk,1032
|
|
61
|
-
scdesigner/transform/nullify.py,sha256=pEtYNDVT2Z_BmVc5CKl3CoxB37KOvqMReoQGnvFYMKE,792
|
|
62
|
-
scdesigner/transform/split.py,sha256=AK3mU52DHSagdyW-d79tsZ12zMKc5xMF_MockUUvciE,741
|
|
63
|
-
scdesigner/transform/substitute.py,sha256=pozV7IVJLyUzJVKeaSX86v0bl9foSFIAQpZ0oC18xak,326
|
|
64
|
-
scdesigner-0.0.5.dist-info/METADATA,sha256=SykpXsuJatehhOj2nFtqVrW5o82d5JEij4r6ndCEf_8,741
|
|
65
|
-
scdesigner-0.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
66
|
-
scdesigner-0.0.5.dist-info/RECORD,,
|