scdesigner 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scdesigner might be problematic. Click here for more details.

Files changed (66) hide show
  1. scdesigner/__init__.py +0 -0
  2. scdesigner/data/__init__.py +16 -0
  3. scdesigner/data/formula.py +137 -0
  4. scdesigner/data/group.py +123 -0
  5. scdesigner/data/sparse.py +39 -0
  6. scdesigner/diagnose/__init__.py +65 -0
  7. scdesigner/diagnose/aic_bic.py +119 -0
  8. scdesigner/diagnose/plot.py +242 -0
  9. scdesigner/estimators/__init__.py +27 -0
  10. scdesigner/estimators/bernoulli.py +85 -0
  11. scdesigner/estimators/gaussian.py +121 -0
  12. scdesigner/estimators/gaussian_copula_factory.py +152 -0
  13. scdesigner/estimators/glm_factory.py +75 -0
  14. scdesigner/estimators/negbin.py +129 -0
  15. scdesigner/estimators/pnmf.py +160 -0
  16. scdesigner/estimators/poisson.py +100 -0
  17. scdesigner/estimators/zero_inflated_negbin.py +195 -0
  18. scdesigner/estimators/zero_inflated_poisson.py +85 -0
  19. scdesigner/format/__init__.py +4 -0
  20. scdesigner/format/format.py +20 -0
  21. scdesigner/format/print.py +30 -0
  22. scdesigner/minimal/__init__.py +17 -0
  23. scdesigner/minimal/bernoulli.py +61 -0
  24. scdesigner/minimal/composite.py +119 -0
  25. scdesigner/minimal/copula.py +33 -0
  26. scdesigner/minimal/formula.py +23 -0
  27. scdesigner/minimal/gaussian.py +65 -0
  28. scdesigner/minimal/kwargs.py +24 -0
  29. scdesigner/minimal/loader.py +166 -0
  30. scdesigner/minimal/marginal.py +140 -0
  31. scdesigner/minimal/negbin.py +73 -0
  32. scdesigner/minimal/positive_nonnegative_matrix_factorization.py +231 -0
  33. scdesigner/minimal/scd3.py +95 -0
  34. scdesigner/minimal/scd3_instances.py +50 -0
  35. scdesigner/minimal/simulator.py +25 -0
  36. scdesigner/minimal/standard_covariance.py +124 -0
  37. scdesigner/minimal/transform.py +145 -0
  38. scdesigner/minimal/zero_inflated_negbin.py +86 -0
  39. scdesigner/predictors/__init__.py +15 -0
  40. scdesigner/predictors/bernoulli.py +9 -0
  41. scdesigner/predictors/gaussian.py +16 -0
  42. scdesigner/predictors/negbin.py +17 -0
  43. scdesigner/predictors/poisson.py +12 -0
  44. scdesigner/predictors/zero_inflated_negbin.py +18 -0
  45. scdesigner/predictors/zero_inflated_poisson.py +18 -0
  46. scdesigner/samplers/__init__.py +23 -0
  47. scdesigner/samplers/bernoulli.py +27 -0
  48. scdesigner/samplers/gaussian.py +25 -0
  49. scdesigner/samplers/glm_factory.py +41 -0
  50. scdesigner/samplers/negbin.py +25 -0
  51. scdesigner/samplers/poisson.py +25 -0
  52. scdesigner/samplers/zero_inflated_negbin.py +40 -0
  53. scdesigner/samplers/zero_inflated_poisson.py +16 -0
  54. scdesigner/simulators/__init__.py +31 -0
  55. scdesigner/simulators/composite_regressor.py +72 -0
  56. scdesigner/simulators/glm_simulator.py +167 -0
  57. scdesigner/simulators/pnmf_regression.py +61 -0
  58. scdesigner/transform/__init__.py +7 -0
  59. scdesigner/transform/amplify.py +14 -0
  60. scdesigner/transform/mask.py +33 -0
  61. scdesigner/transform/nullify.py +25 -0
  62. scdesigner/transform/split.py +23 -0
  63. scdesigner/transform/substitute.py +14 -0
  64. scdesigner-0.0.1.dist-info/METADATA +23 -0
  65. scdesigner-0.0.1.dist-info/RECORD +66 -0
  66. scdesigner-0.0.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,145 @@
1
+ from typing import Union, Sequence
2
+ import numpy as np
3
+ import re
4
+ import torch
5
+ import copy
6
+
7
+
8
+ def nullify(sim, row_pattern: str, col_pattern: str, param: str):
9
+ """Nullify marginal parameters
10
+
11
+ Zero out entries matching (row_pattern, col_pattern) for the marginal `param`.
12
+ """
13
+ sim = copy.deepcopy(sim)
14
+ df = sim.parameters["marginal"][param]
15
+ matches = data_frame_mask(df, row_pattern, col_pattern)
16
+ mask = (~matches).astype(float)
17
+ mat = sim.marginal.predict.coefs[param].detach().cpu().numpy() * mask
18
+ _update_marginal_param(sim, param, mat)
19
+ return sim
20
+
21
+
22
+ def amplify(sim, factor: float, row_pattern: str, col_pattern: str, param: str):
23
+ """Multiply selected marginal entries by factor."""
24
+ sim = copy.deepcopy(sim)
25
+ df = sim.parameters["marginal"][param]
26
+ matches = data_frame_mask(df, row_pattern, col_pattern).astype(float)
27
+ mask = factor * matches + np.ones_like(matches)
28
+ mat = sim.marginal.predict.coefs[param].detach().cpu().numpy() * mask
29
+ _update_marginal_param(sim, param, mat)
30
+ return sim
31
+
32
+
33
+ def decorrelate(sim, row_pattern: str, col_pattern: str, group: Union[str, None] = None):
34
+ """Zero out selected off-diagonal entries of a covariance."""
35
+ sim = copy.deepcopy(sim)
36
+ def _apply_to_df(df):
37
+ m1 = data_frame_mask(df, ".", col_pattern)
38
+ m2 = data_frame_mask(df, row_pattern, ".")
39
+ mask = (m1 | m2)
40
+ np.fill_diagonal(mask, False)
41
+ df.values[mask] = 0
42
+
43
+ cov = sim.parameters["copula"]
44
+ _apply_to_groups(cov, group, _apply_to_df)
45
+ return sim
46
+
47
+
48
+ def correlate(sim, factor: float, row_pattern: str, col_pattern: str, group: Union[str, None] = None):
49
+ """Multiply selected off-diagonal entries by factor."""
50
+ sim = copy.deepcopy(sim)
51
+ def _apply_to_df(df):
52
+ m1 = data_frame_mask(df, ".", col_pattern)
53
+ m2 = data_frame_mask(df, row_pattern, ".")
54
+ mask = (m1 | m2)
55
+ np.fill_diagonal(mask, False)
56
+ df.values[mask] = df.values[mask] * factor
57
+
58
+ cov = sim.parameters["copula"]
59
+ _apply_to_groups(cov, group, _apply_to_df)
60
+ return sim
61
+
62
+
63
+ def replace_param(sim, path: Sequence[str], new_param):
64
+ """Substitute a new parameter for an old one.
65
+
66
+ Use the path to the parameter starting from sim.parameters to identify the
67
+ parameter to transform. Examples: ['marginal','mean'] or
68
+ ['copula','group_name']
69
+ """
70
+ sim = copy.deepcopy(sim)
71
+ if path[0] == "marginal":
72
+ param = path[1]
73
+ mat = np.asarray(new_param)
74
+ _update_marginal_param(sim, param, mat)
75
+
76
+ if path[0] == "copula":
77
+ key = path[1]
78
+ cov = sim.parameters["copula"]
79
+ if isinstance(cov, dict):
80
+ cov[key] = new_param
81
+ else:
82
+ sim.parameters["copula"] = new_param
83
+
84
+ return sim
85
+
86
+
87
+ ###############################################################################
88
+ ## Helper functions used throughout
89
+ ###############################################################################
90
+
91
+ def str_match(string: str, string_list: list) -> bool:
92
+ for l in string_list:
93
+ if l in string:
94
+ return True
95
+ return False
96
+
97
+
98
+ def data_frame_mask(df, row_pattern=".", col_pattern=".") -> np.array:
99
+ """Return a boolean mask for a pandas DataFrame where rows/cols match regex patterns.
100
+
101
+ The returned mask has shape (n_rows, n_cols) and is True where the
102
+ intersection of matched rows and matched columns occurs.
103
+ """
104
+ mask = np.zeros(df.shape, dtype=bool)
105
+ if isinstance(col_pattern, str):
106
+ col_pattern = [col_pattern]
107
+ if isinstance(row_pattern, str):
108
+ row_pattern = [row_pattern]
109
+
110
+ # check for columns that match at least one pattern
111
+ col_matches = np.zeros(df.shape[1], dtype=bool)
112
+ for col in df.columns:
113
+ if any(re.search(pat, col) for pat in col_pattern):
114
+ col_matches[df.columns.get_loc(col)] = True
115
+
116
+ # same with rows
117
+ row_matches = np.zeros(df.shape[0], dtype=bool)
118
+ for idx in df.index:
119
+ if any(re.search(pat, str(idx)) for pat in row_pattern):
120
+ row_matches[df.index.get_loc(idx)] = True
121
+
122
+ # set mask to the intersection of row and column matches
123
+ mask = np.outer(row_matches, col_matches)
124
+ return mask
125
+
126
+
127
+ def _apply_to_groups(cov_obj, grp, f):
128
+ """Apply f to either all group arrays or a single group's array."""
129
+ if isinstance(cov_obj, dict):
130
+ if grp is None:
131
+ for k in list(cov_obj.keys()):
132
+ f(cov_obj[k])
133
+ else:
134
+ f(cov_obj[grp])
135
+ else:
136
+ f(cov_obj)
137
+
138
+
139
+ def _update_marginal_param(sim, param: str, mat: np.ndarray):
140
+ """Update the torch Parameter for a marginal `param`"""
141
+ tensor = sim.marginal.predict.coefs[param]
142
+ with torch.no_grad():
143
+ t = torch.from_numpy(np.asarray(mat)).to(dtype=tensor.dtype, device=tensor.device)
144
+ tensor.copy_(t)
145
+ sim.parameters["marginal"][param].values[:] = tensor.detach().cpu().numpy()
@@ -0,0 +1,86 @@
1
+ from .formula import standardize_formula
2
+ from .marginal import GLMPredictor, Marginal
3
+ from .loader import _to_numpy
4
+ from typing import Union, Dict, Optional
5
+ import torch
6
+ import numpy as np
7
+ from scipy.stats import nbinom, bernoulli
8
+
9
+ class ZeroInflatedNegBin(Marginal):
10
+ """Zero-inflated negative-binomial marginal estimator"""
11
+ def __init__(self, formula: Union[Dict, str]):
12
+ formula = standardize_formula(formula, allowed_keys=['mean', 'dispersion', 'zero_inflation'])
13
+ super().__init__(formula)
14
+
15
+ def setup_optimizer(
16
+ self,
17
+ optimizer_class: Optional[callable] = torch.optim.Adam,
18
+ **optimizer_kwargs,
19
+ ):
20
+ if self.loader is None:
21
+ raise RuntimeError("self.loader is not set (call setup_data first)")
22
+
23
+ link_funs = {
24
+ "mean": torch.exp,
25
+ "dispersion": torch.exp,
26
+ "zero_inflation": torch.sigmoid,
27
+ }
28
+ nll = lambda batch: -self.likelihood(batch).sum()
29
+ self.predict = GLMPredictor(
30
+ n_outcomes=self.n_outcomes,
31
+ feature_dims=self.feature_dims,
32
+ link_fns=link_funs,
33
+ loss_fn=nll,
34
+ optimizer_class=optimizer_class,
35
+ optimizer_kwargs=optimizer_kwargs
36
+ )
37
+
38
+ def likelihood(self, batch):
39
+ """Compute the negative log-likelihood"""
40
+ y, x = batch
41
+ params = self.predict(x)
42
+ mu = params.get("mean")
43
+ r = params.get("dispersion")
44
+ pi = params.get("zero_inflation")
45
+
46
+ # negative binomial component
47
+ negbin_loglikelihood = (
48
+ torch.lgamma(y + r)
49
+ - torch.lgamma(r)
50
+ - torch.lgamma(y + 1)
51
+ + r * torch.log(r)
52
+ + y * torch.log(mu)
53
+ - (r + y) * torch.log(r + mu)
54
+ )
55
+
56
+ # return the mixture, with an offset to prevent log(0)
57
+ return torch.log(pi * (y == 0) + (1 - pi) * torch.exp(negbin_loglikelihood) + 1e-10)
58
+
59
+ def invert(self, u: torch.Tensor, x: Dict[str, torch.Tensor]):
60
+ """Invert pseudoobservations."""
61
+ mu, r, pi, u = self._local_params(x, u)
62
+ y = nbinom(n=r, p=r / (r + mu)).ppf(u)
63
+ delta = bernoulli(1 - pi).ppf(u)
64
+ return torch.from_numpy(y * delta).float()
65
+
66
+ def uniformize(self, y: torch.Tensor, x: Dict[str, torch.Tensor], epsilon=1e-6):
67
+ """Return uniformized pseudo-observations for counts y given covariates x."""
68
+ # cdf values using scipy's parameterization
69
+ mu, r, pi, y = self._local_params(x, y)
70
+ nb_distn = nbinom(n=r, p=r / (r + mu))
71
+ u1 = pi + (1 - pi) * nb_distn.cdf(y)
72
+ u2 = np.where(y > 0, pi + (1 - pi) * nb_distn.cdf(y-1), 0)
73
+
74
+ # randomize within discrete mass to get uniform(0,1)
75
+ v = np.random.uniform(size=y.shape)
76
+ u = np.clip(v * u1 + (1 - v) * u2, epsilon, 1 - epsilon)
77
+ return torch.from_numpy(u).float()
78
+
79
+ def _local_params(self, x, y=None):
80
+ params = self.predict(x)
81
+ mu = params.get('mean')
82
+ r = params.get('dispersion')
83
+ pi = params.get('zero_inflation')
84
+ if y is None:
85
+ return _to_numpy(mu, pi, r)
86
+ return _to_numpy(mu, r, pi, y)
@@ -0,0 +1,15 @@
1
+ from .bernoulli import bernoulli_predict
2
+ from .negbin import negbin_predict
3
+ from .poisson import poisson_predict
4
+ from .gaussian import gaussian_predict
5
+ from .zero_inflated_negbin import zero_inflated_negbin_predict
6
+ from .zero_inflated_poisson import zero_inflated_poisson_predict
7
+
8
+ __all__ = [
9
+ "bernoulli_predict",
10
+ "gaussian_predict",
11
+ "negbin_predict",
12
+ "poisson_predict",
13
+ "zero_inflated_negbin_predict",
14
+ "zero_inflated_poisson_predict",
15
+ ]
@@ -0,0 +1,9 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from ..format import format_matrix
4
+
5
+
6
+ def bernoulli_predict(parameters: dict, obs: pd.DataFrame, formula: str):
7
+ x = format_matrix(obs, formula)
8
+ theta = np.exp(x @ parameters["coef_mean"])
9
+ return {"mean": theta}
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from ..format import format_matrix
4
+ from typing import Union
5
+
6
+ def gaussian_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
7
+ # Standardize formula to dictionary format
8
+ if isinstance(formula, str):
9
+ formula = {'mean': formula, 'sdev': '~ 1'}
10
+
11
+ x_mean = format_matrix(obs, formula["mean"])
12
+ x_dispersion = format_matrix(obs, formula["sdev"])
13
+
14
+ sigma = np.exp(x_dispersion @ parameters["coef_sdev"])
15
+ mu = x_mean @ parameters["coef_mean"]
16
+ return {"mean": mu, "sdev": sigma}
@@ -0,0 +1,17 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from ..format import format_matrix
4
+ from typing import Union
5
+
6
+ def negbin_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
7
+ # Standardize formula to dictionary format
8
+ if isinstance(formula, str):
9
+ formula = {'mean': formula, 'dispersion': '~ 1'}
10
+
11
+ x_mean = format_matrix(obs, formula["mean"])
12
+ x_dispersion = format_matrix(obs, formula["dispersion"])
13
+
14
+ r = np.exp(x_dispersion @ parameters["coef_dispersion"])
15
+ mu = np.exp(x_mean @ parameters["coef_mean"])
16
+ # r and mu are still dataframes with column names being the gene names
17
+ return {"mean": mu, "dispersion": r}
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from ..format import format_matrix
4
+ from typing import Union
5
+
6
+ def poisson_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
7
+ if isinstance(formula, dict):
8
+ formula = formula['mean']
9
+ x = format_matrix(obs, formula)
10
+ mu = np.exp(x @ parameters["coef_mean"])
11
+ return {"mean": mu}
12
+
@@ -0,0 +1,18 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from ..format import format_matrix
4
+ from scipy.special import expit
5
+ from typing import Union
6
+
7
+ def zero_inflated_negbin_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
8
+ if isinstance(formula, str):
9
+ formula = {"mean": formula, "dispersion": "~ 1", "zero_inflation": "~ 1"}
10
+ x_mean = format_matrix(obs, formula["mean"])
11
+ x_dispersion = format_matrix(obs, formula["dispersion"])
12
+ x_zero_inflation = format_matrix(obs, formula["zero_inflation"])
13
+ r, mu, pi = (
14
+ np.exp(x_dispersion @ parameters["coef_dispersion"]),
15
+ np.exp(x_mean @ parameters["coef_mean"]),
16
+ expit(x_zero_inflation @ parameters["coef_zero_inflation"]),
17
+ )
18
+ return {"mean": mu, "dispersion": r, "zero_inflation": pi}
@@ -0,0 +1,18 @@
1
+ from ..format import format_matrix
2
+ from typing import Union
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
7
+ def zero_inflated_poisson_predict(parameters: dict, obs: pd.DataFrame, formula: Union[str, dict]):
8
+ if isinstance(formula, str):
9
+ formula = {'beta': formula, 'pi': '~ 1'}
10
+ mu, pi = (
11
+ np.exp(format_matrix(obs, formula['mean']) @ parameters["coef_mean"]),
12
+ sigmoid(format_matrix(obs, formula['zero_inflation']) @ parameters["coef_zero_inflation"]),
13
+ )
14
+ return {"mean": mu, "zero_inflation": pi}
15
+
16
+
17
+ def sigmoid(x):
18
+ return 1 / (1 + np.exp(-x))
@@ -0,0 +1,23 @@
1
+ from .negbin import negbin_sample, negbin_copula_sample
2
+ from .poisson import poisson_sample, poisson_copula_sample
3
+ from .bernoulli import bernoulli_sample, bernoulli_copula_sample
4
+ from .gaussian import gaussian_regression_sample, gaussian_copula_sample
5
+ from .zero_inflated_negbin import (
6
+ zero_inflated_negbin_sample,
7
+ zero_inflated_negbin_copula_sample,
8
+ )
9
+ from .zero_inflated_poisson import zero_inflated_poisson_sample
10
+
11
+ __all__ = [
12
+ "negbin_sample",
13
+ "negbin_copula_sample",
14
+ "poisson_sample",
15
+ "poisson_copula_sample",
16
+ "bernoulli_sample",
17
+ "bernoulli_copula_sample",
18
+ "gaussian_regression_sample",
19
+ "gaussian_copula_sample",
20
+ "zero_inflated_negbin_sample",
21
+ "zero_inflated_negbin_copula_sample",
22
+ "zero_inflated_poisson_sample",
23
+ ]
@@ -0,0 +1,27 @@
1
+ from . import glm_factory as glm
2
+ from scipy.stats import bernoulli
3
+ from typing import Union
4
+ import numpy as np
5
+
6
+
7
+ def bernoulli_regression_sample_array(local_parameters: dict) -> np.array:
8
+ theta = local_parameters["mean"]
9
+ return bernoulli(theta).rvs()
10
+
11
+
12
+ def bernoulli_copula_sample_array(
13
+ local_parameters: dict, covariance: Union[dict, np.array], groups: dict
14
+ ) -> np.array:
15
+ # initialize uniformized gaussian samples
16
+ N, G = local_parameters["mean"].shape
17
+ u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
18
+
19
+ theta = local_parameters["mean"]
20
+ return bernoulli(theta).ppf(u)
21
+
22
+
23
+ bernoulli_sample = glm.glm_sample_factory(bernoulli_regression_sample_array)
24
+
25
+ bernoulli_copula_sample = glm.gaussian_copula_sample_factory(
26
+ bernoulli_copula_sample_array
27
+ )
@@ -0,0 +1,25 @@
1
+ from scipy.stats import norm
2
+ from . import glm_factory as glm
3
+ from typing import Union
4
+ import numpy as np
5
+
6
+
7
+ def gaussian_regression_sample_array(local_parameters: dict) -> np.array:
8
+ sigma, mu = local_parameters["sdev"], local_parameters["mean"] # dataframes of shape (n, g)
9
+ return norm(loc=mu, scale=sigma).rvs()
10
+
11
+
12
+ def gaussian_copula_sample_array(
13
+ local_parameters: dict, covariance: Union[dict, np.array], groups: dict
14
+ ) -> np.array:
15
+ # initialize uniformized gaussian samples
16
+ N, G = local_parameters["mean"].shape
17
+ u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
18
+
19
+ # transform the correlated uniforms to NB space
20
+ sigma, mu = local_parameters["sdev"], local_parameters["mean"]
21
+ return norm(loc=mu, scale=sigma).ppf(u)
22
+
23
+
24
+ gaussian_regression_sample = glm.glm_sample_factory(gaussian_regression_sample_array)
25
+ gaussian_copula_sample = glm.gaussian_copula_sample_factory(gaussian_copula_sample_array)
@@ -0,0 +1,41 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import anndata as ad
4
+ from typing import Union
5
+ from scipy.stats import norm
6
+
7
+
8
+ def glm_sample_factory(sample_array):
9
+ def sampler(local_parameters: dict, obs: pd.DataFrame) -> ad.AnnData:
10
+ samples = sample_array(local_parameters)
11
+ result = ad.AnnData(X=samples, obs=obs)
12
+ result.var_names = local_parameters["mean"].columns
13
+ return result
14
+ return sampler
15
+
16
+ def gaussian_copula_pseudo_obs(N, G, sigma, groups):
17
+ u = np.zeros((N, G))
18
+
19
+ # cycle across groups
20
+ for group, ix in groups.items():
21
+ if type(sigma) is not dict:
22
+ sigma = {group: sigma}
23
+
24
+ z = np.random.multivariate_normal(
25
+ mean=np.zeros(G), cov=sigma[group], size=len(ix)
26
+ )
27
+ normal_distn = norm(0, np.diag(sigma[group] ** 0.5))
28
+ u[ix] = normal_distn.cdf(z)
29
+ return u
30
+
31
+
32
+ def gaussian_copula_sample_factory(copula_sample_array):
33
+ def sampler(
34
+ local_parameters: dict, covariance: Union[dict, np.array], groups: dict, obs: pd.DataFrame
35
+ ) -> ad.AnnData:
36
+ samples = copula_sample_array(local_parameters, covariance, groups)
37
+ result = ad.AnnData(X=samples, obs=obs)
38
+ result.var_names = local_parameters["mean"].columns
39
+ return result
40
+ return sampler
41
+
@@ -0,0 +1,25 @@
1
+ from scipy.stats import nbinom
2
+ from . import glm_factory as glm
3
+ from typing import Union
4
+ import numpy as np
5
+
6
+
7
+ def negbin_regression_sample_array(local_parameters: dict) -> np.array:
8
+ r, mu = local_parameters["dispersion"], local_parameters["mean"] # dataframes of shape (n, g)
9
+ return nbinom(n=r, p=r / (r + mu)).rvs()
10
+
11
+
12
+ def negbin_copula_sample_array(
13
+ local_parameters: dict, covariance: Union[dict, np.array], groups: dict
14
+ ) -> np.array:
15
+ # initialize uniformized gaussian samples
16
+ N, G = local_parameters["mean"].shape
17
+ u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
18
+
19
+ # transform the correlated uniforms to NB space
20
+ r, mu = local_parameters["dispersion"], local_parameters["mean"]
21
+ return nbinom(n=r, p=r / (r + mu)).ppf(u)
22
+
23
+
24
+ negbin_sample = glm.glm_sample_factory(negbin_regression_sample_array)
25
+ negbin_copula_sample = glm.gaussian_copula_sample_factory(negbin_copula_sample_array)
@@ -0,0 +1,25 @@
1
+ from . import glm_factory as glm
2
+ from scipy.stats import poisson
3
+ from typing import Union
4
+ import numpy as np
5
+
6
+
7
+ def poisson_regression_sample_array(local_parameters: dict) -> np.array:
8
+ mu = local_parameters["mean"]
9
+ return poisson(mu).rvs()
10
+
11
+
12
+ def poisson_copula_sample_array(
13
+ local_parameters: dict, covariance: Union[dict, np.array], groups: dict
14
+ ) -> np.array:
15
+ # initialize uniformized gaussian samples
16
+ N, G = local_parameters["mean"].shape
17
+ u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
18
+
19
+ # invert using poisson margins
20
+ mu = local_parameters["mean"]
21
+ return poisson(mu).ppf(u)
22
+
23
+
24
+ poisson_sample = glm.glm_sample_factory(poisson_regression_sample_array)
25
+ poisson_copula_sample = glm.gaussian_copula_sample_factory(poisson_copula_sample_array)
@@ -0,0 +1,40 @@
1
+ from scipy.stats import nbinom, bernoulli
2
+ from . import glm_factory as glm
3
+ from typing import Union
4
+ import numpy as np
5
+
6
+
7
+ def zero_inflated_negbin_sample_array(local_parameters: dict) -> np.array:
8
+ mu, r, pi = (
9
+ local_parameters["mean"],
10
+ local_parameters["dispersion"],
11
+ local_parameters["zero_inflation"],
12
+ )
13
+ return nbinom(n=r, p=r / (r + mu)).rvs() * bernoulli(1 - pi).rvs()
14
+
15
+
16
+ def zero_inflated_negbin_copula_sample_array(
17
+ local_parameters: dict, covariance: Union[dict, np.array], groups: dict
18
+ ) -> np.array:
19
+ # initialize uniformized gaussian samples
20
+ N, G = local_parameters["mean"].shape
21
+ u = glm.gaussian_copula_pseudo_obs(N, G, covariance, groups)
22
+
23
+ # get zero inflated NB parameters
24
+ mu, r, pi = (
25
+ local_parameters["mean"],
26
+ local_parameters["dispersion"],
27
+ local_parameters["zero_inflation"],
28
+ )
29
+
30
+ # zero inflate after first simulating from NB
31
+ positive_part = nbinom(n=r, p=r / (r + mu)).ppf(u)
32
+ zero_inflation = bernoulli(1 - pi).ppf(u)
33
+ return zero_inflation * positive_part
34
+
35
+
36
+ zero_inflated_negbin_sample = glm.glm_sample_factory(zero_inflated_negbin_sample_array)
37
+
38
+ zero_inflated_negbin_copula_sample = glm.gaussian_copula_sample_factory(
39
+ zero_inflated_negbin_copula_sample_array
40
+ )
@@ -0,0 +1,16 @@
1
+ from scipy.stats import poisson, bernoulli
2
+ from . import glm_factory as glm
3
+ import numpy as np
4
+
5
+
6
+ def zero_inflated_poisson_sample_array(local_parameters: dict) -> np.array:
7
+ mu, pi = (
8
+ local_parameters["mean"],
9
+ local_parameters["zero_inflation"],
10
+ )
11
+ return poisson(mu).rvs() * bernoulli(1 - pi).rvs()
12
+
13
+
14
+ zero_inflated_poisson_sample = glm.glm_sample_factory(
15
+ zero_inflated_poisson_sample_array
16
+ )
@@ -0,0 +1,31 @@
1
+ from .composite_regressor import CompositeGLMSimulator
2
+ from .glm_simulator import (
3
+ BernoulliCopulaSimulator,
4
+ BernoulliRegressionSimulator,
5
+ NegBinCopulaSimulator,
6
+ NegBinRegressionSimulator,
7
+ PoissonCopulaSimulator,
8
+ PoissonRegressionSimulator,
9
+ GaussianRegressionSimulator,
10
+ GaussianCopulaSimulator,
11
+ ZeroInflatedNegBinCopulaSimulator,
12
+ ZeroInflatedNegBinRegressionSimulator,
13
+ ZeroInflatedPoissonRegressionSimulator,
14
+ )
15
+ from .pnmf_regression import PNMFRegressionSimulator
16
+
17
+ __all__ = [
18
+ "BernoulliCopulaSimulator",
19
+ "BernoulliRegressionSimulator",
20
+ "CompositeGLMSimulator",
21
+ "GaussianRegressionSimulator",
22
+ "GaussianCopulaSimulator",
23
+ "NegBinCopulaSimulator",
24
+ "NegBinRegressionSimulator",
25
+ "PNMFRegressionSimulator",
26
+ "PoissonCopulaSimulator",
27
+ "PoissonRegressionSimulator",
28
+ "ZeroInflatedNegBinCopulaSimulator",
29
+ "ZeroInflatedNegBinRegressionSimulator",
30
+ "ZeroInflatedPoissonRegressionSimulator",
31
+ ]