scdesigner 0.0.5__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. scdesigner/base/__init__.py +8 -0
  2. scdesigner/base/copula.py +416 -0
  3. scdesigner/base/marginal.py +391 -0
  4. scdesigner/base/simulator.py +59 -0
  5. scdesigner/copulas/__init__.py +8 -0
  6. scdesigner/copulas/standard_copula.py +645 -0
  7. scdesigner/datasets/__init__.py +5 -0
  8. scdesigner/datasets/pancreas.py +39 -0
  9. scdesigner/distributions/__init__.py +19 -0
  10. scdesigner/{minimal → distributions}/bernoulli.py +42 -14
  11. scdesigner/distributions/gaussian.py +114 -0
  12. scdesigner/distributions/negbin.py +121 -0
  13. scdesigner/distributions/negbin_irls.py +72 -0
  14. scdesigner/distributions/negbin_irls_funs.py +456 -0
  15. scdesigner/distributions/poisson.py +88 -0
  16. scdesigner/{minimal → distributions}/zero_inflated_negbin.py +39 -10
  17. scdesigner/distributions/zero_inflated_poisson.py +103 -0
  18. scdesigner/simulators/__init__.py +24 -28
  19. scdesigner/simulators/composite.py +239 -0
  20. scdesigner/simulators/positive_nonnegative_matrix_factorization.py +477 -0
  21. scdesigner/simulators/scd3.py +486 -0
  22. scdesigner/transform/__init__.py +8 -6
  23. scdesigner/{minimal → transform}/transform.py +1 -1
  24. scdesigner/{minimal → utils}/kwargs.py +4 -1
  25. {scdesigner-0.0.5.dist-info → scdesigner-0.0.10.dist-info}/METADATA +1 -1
  26. scdesigner-0.0.10.dist-info/RECORD +28 -0
  27. {scdesigner-0.0.5.dist-info → scdesigner-0.0.10.dist-info}/WHEEL +1 -1
  28. scdesigner/data/__init__.py +0 -16
  29. scdesigner/data/formula.py +0 -137
  30. scdesigner/data/group.py +0 -123
  31. scdesigner/data/sparse.py +0 -39
  32. scdesigner/diagnose/__init__.py +0 -65
  33. scdesigner/diagnose/aic_bic.py +0 -119
  34. scdesigner/diagnose/plot.py +0 -242
  35. scdesigner/estimators/__init__.py +0 -32
  36. scdesigner/estimators/bernoulli.py +0 -85
  37. scdesigner/estimators/gaussian.py +0 -121
  38. scdesigner/estimators/gaussian_copula_factory.py +0 -367
  39. scdesigner/estimators/glm_factory.py +0 -75
  40. scdesigner/estimators/negbin.py +0 -153
  41. scdesigner/estimators/pnmf.py +0 -160
  42. scdesigner/estimators/poisson.py +0 -124
  43. scdesigner/estimators/zero_inflated_negbin.py +0 -195
  44. scdesigner/estimators/zero_inflated_poisson.py +0 -85
  45. scdesigner/format/__init__.py +0 -4
  46. scdesigner/format/format.py +0 -20
  47. scdesigner/format/print.py +0 -30
  48. scdesigner/minimal/__init__.py +0 -17
  49. scdesigner/minimal/composite.py +0 -119
  50. scdesigner/minimal/copula.py +0 -205
  51. scdesigner/minimal/formula.py +0 -23
  52. scdesigner/minimal/gaussian.py +0 -65
  53. scdesigner/minimal/loader.py +0 -211
  54. scdesigner/minimal/marginal.py +0 -154
  55. scdesigner/minimal/negbin.py +0 -73
  56. scdesigner/minimal/positive_nonnegative_matrix_factorization.py +0 -231
  57. scdesigner/minimal/scd3.py +0 -96
  58. scdesigner/minimal/scd3_instances.py +0 -50
  59. scdesigner/minimal/simulator.py +0 -25
  60. scdesigner/minimal/standard_copula.py +0 -383
  61. scdesigner/predictors/__init__.py +0 -15
  62. scdesigner/predictors/bernoulli.py +0 -9
  63. scdesigner/predictors/gaussian.py +0 -16
  64. scdesigner/predictors/negbin.py +0 -17
  65. scdesigner/predictors/poisson.py +0 -12
  66. scdesigner/predictors/zero_inflated_negbin.py +0 -18
  67. scdesigner/predictors/zero_inflated_poisson.py +0 -18
  68. scdesigner/samplers/__init__.py +0 -23
  69. scdesigner/samplers/bernoulli.py +0 -27
  70. scdesigner/samplers/gaussian.py +0 -25
  71. scdesigner/samplers/glm_factory.py +0 -103
  72. scdesigner/samplers/negbin.py +0 -25
  73. scdesigner/samplers/poisson.py +0 -25
  74. scdesigner/samplers/zero_inflated_negbin.py +0 -40
  75. scdesigner/samplers/zero_inflated_poisson.py +0 -16
  76. scdesigner/simulators/composite_regressor.py +0 -72
  77. scdesigner/simulators/glm_simulator.py +0 -167
  78. scdesigner/simulators/pnmf_regression.py +0 -61
  79. scdesigner/transform/amplify.py +0 -14
  80. scdesigner/transform/mask.py +0 -33
  81. scdesigner/transform/nullify.py +0 -25
  82. scdesigner/transform/split.py +0 -23
  83. scdesigner/transform/substitute.py +0 -14
  84. scdesigner-0.0.5.dist-info/RECORD +0 -66
@@ -1,154 +0,0 @@
1
- from .kwargs import DEFAULT_ALLOWED_KWARGS, _filter_kwargs
2
- from .loader import adata_loader, get_device
3
- from anndata import AnnData
4
- from typing import Union, Dict, Optional, Tuple
5
- import pandas as pd
6
- import torch
7
- import torch.nn as nn
8
- from abc import ABC, abstractmethod
9
-
10
-
11
- class Marginal(ABC):
12
- def __init__(self, formula: Union[Dict, str]):
13
- self.formula = formula
14
- self.feature_dims = None
15
- self.loader = None
16
- self.n_outcomes = None
17
- self.predict = None
18
- self.predictor_names = None
19
- self.parameters = None
20
- self.device = get_device()
21
-
22
- def setup_data(self, adata: AnnData, batch_size: int = 1024, **kwargs):
23
- """Set up the dataloader for the AnnData object."""
24
- # keep a reference to the AnnData for later use (e.g., var_names)
25
- self.adata = adata
26
- self.loader = adata_loader(adata, self.formula, batch_size=batch_size, **kwargs)
27
- X_batch, obs_batch = next(iter(self.loader))
28
- self.n_outcomes = X_batch.shape[1]
29
- self.feature_dims = {k: v.shape[1] for k, v in obs_batch.items()}
30
- self.predictor_names = self.loader.dataset.predictor_names
31
-
32
- def fit(self, max_epochs: int = 100, **kwargs):
33
- """Fit the marginal predictor using vanilla PyTorch training loop."""
34
- if self.predict is None:
35
- self.setup_optimizer(**kwargs)
36
-
37
- for epoch in range(max_epochs):
38
- epoch_loss, n_batches = 0.0, 0
39
-
40
- for batch in self.loader:
41
- y, x = batch
42
- if y.device != self.device:
43
- y = y.to(self.device)
44
- x = {k: v.to(self.device) for k, v in x.items()}
45
-
46
- self.predict.optimizer.zero_grad()
47
- loss = self.predict.loss_fn((y, x))
48
- loss.backward()
49
- self.predict.optimizer.step()
50
-
51
- epoch_loss += loss.item()
52
- n_batches += 1
53
-
54
- avg_loss = epoch_loss / n_batches
55
- print(f"Epoch {epoch}/{max_epochs}, Loss: {avg_loss:.4f}", end='\r')
56
- self.parameters = self.format_parameters()
57
-
58
- def format_parameters(self):
59
- """Convert fitted coefficient tensors into pandas DataFrames.
60
-
61
- Returns:
62
- dict: mapping from parameter name -> pandas.DataFrame with rows
63
- corresponding to predictor column names (from
64
- `self.predictor_names[param]`) and columns corresponding to
65
- `self.adata.var_names` (gene names). The values are moved to
66
- CPU and converted to numpy floats.
67
- """
68
- var_names = list(self.adata.var_names)
69
-
70
- dfs = {}
71
- for param, tensor in self.predict.coefs.items():
72
- coef_np = tensor.detach().cpu().numpy()
73
- row_names = list(self.predictor_names[param])
74
- dfs[param] = pd.DataFrame(coef_np, index=row_names, columns=var_names)
75
- return dfs
76
-
77
- def num_params(self):
78
- """Return the number of parameters."""
79
- if self.predict is None:
80
- return 0
81
- return sum(p.numel() for p in self.predict.parameters() if p.requires_grad)
82
-
83
- @abstractmethod
84
- def setup_optimizer(self, **kwargs):
85
- raise NotImplementedError
86
-
87
- @abstractmethod
88
- def likelihood(self, batch: Tuple[torch.Tensor, Dict[str, torch.Tensor]]):
89
- """Compute the (negative) log-likelihood or loss for a batch.
90
- """
91
- raise NotImplementedError
92
-
93
- @abstractmethod
94
- def invert(self, u: torch.Tensor, x: Dict[str, torch.Tensor]):
95
- """Invert pseudoobservations."""
96
- raise NotImplementedError
97
-
98
- @abstractmethod
99
- def uniformize(self, y: torch.Tensor, x: Dict[str, torch.Tensor]):
100
- """Uniformize using learned CDF.
101
- """
102
- raise NotImplementedError
103
-
104
-
105
- class GLMPredictor(nn.Module):
106
- """GLM-style predictor with arbitrary named parameters.
107
-
108
- Args:
109
- n_outcomes: number of model outputs (e.g. genes)
110
- feature_dims: mapping from param name -> number of covariate features
111
- link_fns: optional mapping from param name -> callable(link) applied to linear predictor
112
-
113
- The module will create one coefficient matrix per named parameter with shape
114
- (n_features_for_param, n_outcomes) and expose them as Parameters under
115
- `self.coefs[param_name]`.
116
- """
117
- def __init__(
118
- self,
119
- n_outcomes: int,
120
- feature_dims: Dict[str, int],
121
- link_fns: Dict[str, callable] = None,
122
- loss_fn: Optional[callable] = None,
123
- optimizer_class: Optional[callable] = torch.optim.Adam,
124
- optimizer_kwargs: Optional[Dict] = None,
125
- ):
126
- super().__init__()
127
- self.n_outcomes = int(n_outcomes)
128
- self.feature_dims = dict(feature_dims)
129
- self.param_names = list(self.feature_dims.keys())
130
-
131
- self.link_fns = link_fns or {k: torch.exp for k in self.param_names}
132
- self.coefs = nn.ParameterDict()
133
- for key, dim in self.feature_dims.items():
134
- self.coefs[key] = nn.Parameter(torch.zeros(dim, self.n_outcomes))
135
- self.reset_parameters()
136
-
137
- self.loss_fn = loss_fn
138
- self.to(get_device())
139
-
140
- optimizer_kwargs = optimizer_kwargs or {}
141
- filtered_kwargs = _filter_kwargs(optimizer_kwargs, DEFAULT_ALLOWED_KWARGS['optimizer'])
142
- self.optimizer = optimizer_class(self.parameters(), **filtered_kwargs)
143
-
144
- def reset_parameters(self):
145
- for p in self.coefs.values():
146
- nn.init.normal_(p, mean=0.0, std=1e-4)
147
-
148
- def forward(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
149
- out = {}
150
- for name in self.param_names:
151
- x_beta = obs_dict[name] @ self.coefs[name]
152
- link = self.link_fns.get(name, torch.exp)
153
- out[name] = link(x_beta)
154
- return out
@@ -1,73 +0,0 @@
1
- from .formula import standardize_formula
2
- from .marginal import GLMPredictor, Marginal
3
- from .loader import _to_numpy
4
- from typing import Union, Dict, Optional
5
- import torch
6
- import numpy as np
7
- from scipy.stats import nbinom
8
-
9
- class NegBin(Marginal):
10
- """Negative-binomial marginal estimator"""
11
- def __init__(self, formula: Union[Dict, str]):
12
- formula = standardize_formula(formula, allowed_keys=['mean', 'dispersion'])
13
- super().__init__(formula)
14
-
15
- def setup_optimizer(
16
- self,
17
- optimizer_class: Optional[callable] = torch.optim.Adam,
18
- **optimizer_kwargs,
19
- ):
20
- if self.loader is None:
21
- raise RuntimeError("self.loader is not set (call setup_data first)")
22
-
23
- nll = lambda batch: -self.likelihood(batch).sum()
24
- self.predict = GLMPredictor(
25
- n_outcomes=self.n_outcomes,
26
- feature_dims=self.feature_dims,
27
- loss_fn=nll,
28
- optimizer_class=optimizer_class,
29
- optimizer_kwargs=optimizer_kwargs
30
- )
31
-
32
- def likelihood(self, batch):
33
- """Compute the log-likelihood"""
34
- y, x = batch
35
- params = self.predict(x)
36
- mu = params.get('mean')
37
- r = params.get('dispersion')
38
- return (
39
- torch.lgamma(y + r)
40
- - torch.lgamma(r)
41
- - torch.lgamma(y + 1.0)
42
- + r * torch.log(r)
43
- + y * torch.log(mu)
44
- - (r + y) * torch.log(r + mu)
45
- )
46
-
47
- def invert(self, u: torch.Tensor, x: Dict[str, torch.Tensor]):
48
- """Invert pseudoobservations."""
49
- mu, r, u = self._local_params(x, u)
50
- p = r / (r + mu)
51
- y = nbinom(n=r, p=p).ppf(u)
52
- return torch.from_numpy(y).float()
53
-
54
- def uniformize(self, y: torch.Tensor, x: Dict[str, torch.Tensor], epsilon=1e-6):
55
- """Return uniformized pseudo-observations for counts y given covariates x."""
56
- # cdf values using scipy's parameterization
57
- mu, r, y = self._local_params(x, y)
58
- p = r / (r + mu)
59
- u1 = nbinom(n=r, p=p).cdf(y)
60
- u2 = np.where(y > 0, nbinom(n=r, p=p).cdf(y - 1), 0.0)
61
-
62
- # randomize within discrete mass to get uniform(0,1)
63
- v = np.random.uniform(size=y.shape)
64
- u = np.clip(v * u1 + (1.0 - v) * u2, epsilon, 1.0 - epsilon)
65
- return torch.from_numpy(u).float()
66
-
67
- def _local_params(self, x, y=None):
68
- params = self.predict(x)
69
- mu = params.get('mean')
70
- r = params.get('dispersion')
71
- if y is None:
72
- return _to_numpy(mu, r)
73
- return _to_numpy(mu, r, y)
@@ -1,231 +0,0 @@
1
- from .formula import standardize_formula
2
- from .loader import _to_numpy
3
- from .simulator import Simulator
4
- from anndata import AnnData
5
- from formulaic import model_matrix
6
- from scipy.stats import gamma
7
- from typing import Union, Dict
8
- import numpy as np
9
- import pandas as pd
10
- import torch
11
-
12
- ################################################################################
13
- ## Functions for estimating PNMF regression
14
- ################################################################################
15
-
16
- # computes PNMF weight and score, ncol specify the number of clusters
17
- def pnmf(log_data, nbase=3, **kwargs): # data is np array, log transformed read data
18
- """
19
- Computes PNMF weight and score.
20
-
21
- :log_data: log transformed np array of read data
22
- :ncol: specify the number of clusters
23
- :return: W (weights, gene x base) and S (scores, base x cell) as numpy arrays
24
- """
25
- U = left_singular(log_data, nbase)
26
- W = pnmf_eucdist(log_data, U, **kwargs)
27
- W = W / np.linalg.norm(W, ord=2)
28
- S = W.T @ log_data
29
- return W, S
30
-
31
-
32
- def gamma_regression_array(
33
- x: np.array, y: np.array, lr: float = 0.1, epochs: int = 40
34
- ) -> dict:
35
- x = torch.tensor(x, dtype=torch.float32)
36
- y = torch.tensor(y, dtype=torch.float32)
37
-
38
- n_features, n_outcomes = x.shape[1], y.shape[1]
39
- a = torch.zeros(n_features * n_outcomes, requires_grad=True)
40
- loc = torch.zeros(n_features * n_outcomes, requires_grad=True)
41
- beta = torch.zeros(n_features * n_outcomes, requires_grad=True)
42
- optimizer = torch.optim.Adam([a, loc, beta], lr=lr)
43
-
44
- for i in range(epochs):
45
- optimizer.zero_grad()
46
- loss = negative_gamma_log_likelihood(a, beta, loc, x, y)
47
- loss.backward()
48
- optimizer.step()
49
-
50
- a, loc, beta = _to_numpy(a, loc, beta)
51
- a = a.reshape(n_features, n_outcomes)
52
- loc = loc.reshape(n_features, n_outcomes)
53
- beta = beta.reshape(n_features, n_outcomes)
54
- return {"a": a, "loc": loc, "beta": beta}
55
-
56
-
57
- def class_generator(score, n_clusters=3):
58
- """
59
- Generates one-hot encoding for score classes
60
- """
61
- from sklearn.cluster import KMeans
62
- kmeans = KMeans(n_clusters, random_state=0) # Specify the number of clusters
63
- kmeans.fit(score.T)
64
- labels = kmeans.labels_
65
- num_classes = len(np.unique(labels))
66
- one_hot = np.eye(num_classes)[labels].astype(int)
67
- return labels
68
-
69
-
70
- ###############################################################################
71
- ## Helpers for deriving PNMF
72
- ###############################################################################
73
-
74
-
75
- def pnmf_eucdist(X, W_init, maxIter=500, threshold=1e-4, tol=1e-10, verbose=False, **kwargs):
76
- # initialization
77
- W = W_init # initial W is the PCA of X
78
- XX = X @ X.T
79
-
80
- # iterations
81
- for iter in range(maxIter):
82
- if verbose and (iter + 1) % 10 == 0:
83
- print("%d iterations used." % (iter + 1))
84
- W_old = W
85
-
86
- XXW = XX @ W
87
- SclFactor = np.dot(W, W.T @ XXW) + np.dot(XXW, W.T @ W)
88
-
89
- # QuotientLB
90
- SclFactor = MatFindlb(SclFactor, tol)
91
- SclFactor = XXW / SclFactor
92
- W = W * SclFactor # somehow W *= SclFactor doesn't work?
93
-
94
- norm_W = np.linalg.norm(W)
95
- W /= norm_W
96
- W = MatFind(W, tol)
97
-
98
- diffW = np.linalg.norm(W_old - W) / np.linalg.norm(W_old)
99
- if diffW < threshold:
100
- break
101
-
102
- return W
103
-
104
-
105
- # left singular vector of X
106
- def left_singular(X, k):
107
- from scipy.sparse.linalg import svds
108
- U, _, _ = svds(X, k=k)
109
- return np.abs(U)
110
-
111
-
112
- def MatFindlb(A, lb):
113
- B = np.ones(A.shape) * lb
114
- Alb = np.where(A < lb, B, A)
115
- return Alb
116
-
117
-
118
- def MatFind(A, ZeroThres):
119
- B = np.zeros(A.shape)
120
- Atrunc = np.where(A < ZeroThres, B, A)
121
- return Atrunc
122
-
123
-
124
- ###############################################################################
125
- ## Helpers for training PNMF regression
126
- ###############################################################################
127
-
128
-
129
- def shifted_gamma_pdf(x, alpha, beta, loc):
130
- if not torch.is_tensor(x):
131
- x = torch.tensor(x)
132
- mask = x < loc
133
- y_clamped = torch.clamp(x - loc, min=1e-12)
134
-
135
- log_pdf = (
136
- alpha * torch.log(beta)
137
- - torch.lgamma(alpha)
138
- + (alpha - 1) * torch.log(y_clamped)
139
- - beta * y_clamped
140
- )
141
- loss = -torch.mean(log_pdf[~mask])
142
- n_invalid = mask.sum()
143
- if n_invalid > 0: # force samples to be greater than loc
144
- loss = loss + 1e10 * n_invalid.float()
145
- return loss
146
-
147
-
148
- def negative_gamma_log_likelihood(log_a, log_beta, loc, X, y):
149
- n_features = X.shape[1]
150
- n_outcomes = y.shape[1]
151
-
152
- a = torch.exp(log_a.reshape(n_features, n_outcomes))
153
- beta = torch.exp(log_beta.reshape(n_features, n_outcomes))
154
- loc = loc.reshape(n_features, n_outcomes)
155
- return shifted_gamma_pdf(y, X @ a, X @ beta, X @ loc)
156
-
157
- def format_gamma_parameters(
158
- parameters: dict,
159
- W_index: list,
160
- coef_index: list,
161
- ) -> dict:
162
- parameters["a"] = pd.DataFrame(parameters["a"], index=coef_index)
163
- parameters["loc"] = pd.DataFrame(parameters["loc"], index=coef_index)
164
- parameters["beta"] = pd.DataFrame(parameters["beta"], index=coef_index)
165
- parameters["W"] = pd.DataFrame(parameters["W"], index=W_index)
166
- return parameters
167
-
168
-
169
- ################################################################################
170
- ## Associated PNMF Objects
171
- ################################################################################
172
-
173
- class PositiveNMF(Simulator):
174
- """Positive nonnegative matrix factorization marginal estimator"""
175
- def __init__(self, formula: Union[Dict, str], **kwargs):
176
- self.formula = standardize_formula(formula, allowed_keys=['mean'])
177
- self.parameters = None
178
- self.hyperparams = kwargs
179
-
180
-
181
- def setup_data(self, adata: AnnData, **kwargs):
182
- self.log_data = np.log1p(adata.X).T
183
- self.n_outcomes = self.log_data.shape[1]
184
- self.template = adata
185
- self.x = model_matrix(self.formula["mean"], adata.obs)
186
- self.columns = self.x.columns
187
- self.x = np.asarray(self.x)
188
-
189
-
190
- def fit(self, adata: AnnData, lr: float=0.1):
191
- self.setup_data(adata)
192
- W, S = pnmf(self.log_data, **self.hyperparams)
193
- parameters = gamma_regression_array(self.x, S.T, lr)
194
- parameters["W"] = W
195
- self.parameters = format_gamma_parameters(
196
- parameters, list(self.template.var_names), list(self.columns)
197
- )
198
-
199
-
200
- def predict(self, obs=None, **kwargs):
201
- """Predict from an obs dataframe"""
202
- if obs is None:
203
- obs = self.template.obs
204
-
205
- x = model_matrix(self.formula["mean"], obs)
206
- a, loc, beta = (
207
- x @ np.exp(self.parameters["a"]),
208
- x @ self.parameters["loc"],
209
- x @ np.exp(self.parameters["beta"]),
210
- )
211
- return {"a": a, "loc": loc, "beta": beta}
212
-
213
-
214
- def sample(self, obs=None):
215
- """Generate samples."""
216
- if obs is None:
217
- obs = self.template.obs
218
- W = self.parameters["W"]
219
- parameters = self.predict(obs)
220
- a, loc, beta = parameters["a"], parameters["loc"], parameters["beta"]
221
- sim_score = gamma(a, loc, 1 / beta).rvs()
222
- samples = np.exp(W @ sim_score.T).T
223
-
224
- # thresholding samples
225
- floor = np.floor(samples)
226
- samples = floor + np.where(samples - floor < 0.9, 0, 1) - 1
227
- samples = np.where(samples < 0, 0, samples)
228
-
229
- result = AnnData(X=samples, obs=obs)
230
- result.var_names = self.template.var_names
231
- return result
@@ -1,96 +0,0 @@
1
- from .copula import Copula
2
- from .loader import obs_loader, adata_loader
3
- from .marginal import Marginal
4
- from .simulator import Simulator
5
- from anndata import AnnData
6
- from tqdm import tqdm
7
- import torch
8
- import numpy as np
9
- from abc import ABC, abstractmethod
10
-
11
- class SCD3Simulator(Simulator):
12
- """Simulation wrapper"""
13
-
14
- def __init__(self, marginal: Marginal, copula: Copula):
15
- self.marginal = marginal
16
- self.copula = copula
17
- self.template = None
18
- self.parameters = None
19
-
20
- def fit(
21
- self,
22
- adata: AnnData,
23
- **kwargs):
24
- """Fit the simulator"""
25
- self.template = adata
26
- self.marginal.setup_data(adata, **kwargs)
27
- self.marginal.setup_optimizer(**kwargs)
28
- self.marginal.fit(**kwargs)
29
-
30
- # copula simulator
31
- self.copula.setup_data(adata, self.marginal.formula, **kwargs)
32
- self.copula.fit(self.marginal.uniformize, **kwargs)
33
- self.parameters = {
34
- "marginal": self.marginal.parameters,
35
- "copula": self.copula.parameters
36
- }
37
-
38
- def predict(self, obs=None, batch_size: int = 1000, **kwargs):
39
- """Predict from an obs dataframe"""
40
- # prepare an internal data loader for this obs
41
- if obs is None:
42
- obs = self.template.obs
43
- loader = obs_loader(
44
- obs,
45
- self.marginal.formula,
46
- batch_size=batch_size,
47
- **kwargs
48
- )
49
-
50
- # get predictions across batches
51
- local_parameters = []
52
- for _, x_dict in loader:
53
- l = self.marginal.predict(x_dict)
54
- local_parameters.append(l)
55
-
56
- # convert to a merged dictionary
57
- keys = list(local_parameters[0].keys())
58
- return {
59
- k: torch.cat([d[k] for d in local_parameters]).detach().cpu().numpy()
60
- for k in keys
61
- }
62
-
63
- def sample(self, obs=None, batch_size: int = 1000, **kwargs):
64
- """Generate samples."""
65
- if obs is None:
66
- obs = self.template.obs
67
- loader = obs_loader(
68
- obs,
69
- self.copula.formula | self.marginal.formula,
70
- batch_size=batch_size,
71
- **kwargs
72
- )
73
-
74
- # get samples across batches
75
- samples = []
76
- for _, x_dict in loader:
77
- u = self.copula.pseudo_obs(x_dict)
78
- u = torch.from_numpy(u)
79
- samples.append(self.marginal.invert(u, x_dict))
80
- samples = torch.cat(samples).detach().cpu().numpy()
81
- return AnnData(X = samples, obs=obs)
82
-
83
- def complexity(self, adata: AnnData = None, **kwargs):
84
- if adata is None:
85
- adata = self.template
86
-
87
- N, ll = 0, 0
88
- loader = adata_loader(adata, self.marginal.formula | self.copula.formula, **kwargs)
89
- for batch in tqdm(loader, desc="Computing log-likelihood..."):
90
- ll += self.copula.likelihood(self.marginal.uniformize, batch).sum()
91
- N += len(batch[0])
92
-
93
- return {
94
- "aic": -2 * ll + 2 * self.copula.num_params(),
95
- "bic": -2 * ll + np.log(N) * self.copula.num_params()
96
- }
@@ -1,50 +0,0 @@
1
- from .scd3 import SCD3Simulator
2
- from .negbin import NegBin
3
- from .zero_inflated_negbin import ZeroInflatedNegBin
4
- from .gaussian import Gaussian
5
- from .standard_copula import StandardCopula
6
- from typing import Optional
7
-
8
-
9
- class NegBinCopula(SCD3Simulator):
10
- def __init__(self,
11
- mean_formula: Optional[str] = None,
12
- dispersion_formula: Optional[str] = None,
13
- copula_formula: Optional[str] = None) -> None:
14
- marginal = NegBin({"mean": mean_formula, "dispersion": dispersion_formula})
15
- covariance = StandardCopula(copula_formula)
16
- super().__init__(marginal, covariance)
17
-
18
-
19
- class ZeroInflatedNegBinCopula(SCD3Simulator):
20
- def __init__(self,
21
- mean_formula: Optional[str] = None,
22
- dispersion_formula: Optional[str] = None,
23
- zero_inflation_formula: Optional[str] = None,
24
- copula_formula: Optional[str] = None) -> None:
25
- marginal = ZeroInflatedNegBin({
26
- "mean": mean_formula,
27
- "dispersion": dispersion_formula,
28
- "zero_inflation_formula": zero_inflation_formula
29
- })
30
- covariance = StandardCopula(copula_formula)
31
- super().__init__(marginal, covariance)
32
-
33
-
34
- class BernoulliCopula(SCD3Simulator):
35
- def __init__(self,
36
- mean_formula: Optional[str] = None,
37
- copula_formula: Optional[str] = None) -> None:
38
- marginal = NegBin({"mean": mean_formula})
39
- covariance = StandardCopula(copula_formula)
40
- super().__init__(marginal, covariance)
41
-
42
-
43
- class GaussianCopula(SCD3Simulator):
44
- def __init__(self,
45
- mean_formula: Optional[str] = None,
46
- sdev_formula: Optional[str] = None,
47
- copula_formula: Optional[str] = None) -> None:
48
- marginal = Gaussian({"mean": mean_formula, "sdev": sdev_formula})
49
- covariance = StandardCopula(copula_formula)
50
- super().__init__(marginal, covariance)
@@ -1,25 +0,0 @@
1
- from anndata import AnnData
2
- from typing import Dict
3
- from pandas import DataFrame
4
- from abc import abstractmethod
5
-
6
- class Simulator:
7
- """Simulation abstract class"""
8
-
9
- def __init__(self):
10
- self.parameters = None
11
-
12
- @abstractmethod
13
- def fit(self, anndata: AnnData, **kwargs) -> None:
14
- """Fit the simulator"""
15
- self.template = anndata
16
-
17
- @abstractmethod
18
- def predict(self, obs: DataFrame=None, **kwargs) -> Dict:
19
- """Predict from an obs dataframe"""
20
- pass
21
-
22
- @abstractmethod
23
- def sample(self, obs: DataFrame=None, **kwargs) -> AnnData:
24
- """Generate samples."""
25
- pass