scdesigner 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,124 +0,0 @@
1
- from .copula import Copula
2
- from .formula import standardize_formula
3
- from .kwargs import DEFAULT_ALLOWED_KWARGS, _filter_kwargs
4
- from anndata import AnnData
5
- from scipy.stats import norm, multivariate_normal
6
- from tqdm import tqdm
7
- from typing import Dict, Union, Callable, Tuple
8
- import numpy as np
9
- import pandas as pd
10
- import torch
11
-
12
-
13
- class StandardCovariance(Copula):
14
- def __init__(self, formula: str = "~ 1"):
15
- formula = standardize_formula(formula, allowed_keys=['group'])
16
- super().__init__(formula)
17
- self.groups = None
18
-
19
-
20
- def setup_data(self, adata: AnnData, marginal_formula: Dict[str, str], **kwargs):
21
- data_kwargs = _filter_kwargs(kwargs, DEFAULT_ALLOWED_KWARGS['data'])
22
- super().setup_data(adata, marginal_formula, **data_kwargs)
23
- _, obs_batch = next(iter(self.loader))
24
- obs_batch_group = obs_batch.get("group")
25
-
26
- # fill in group indexing variables
27
- self.groups = self.loader.dataset.predictor_names["group"]
28
- self.n_groups = len(self.groups)
29
- self.group_col = {g: i for i, g in enumerate(self.groups)}
30
-
31
- # check that obs_batch is a binary grouping matrix
32
- unique_vals = torch.unique(obs_batch_group)
33
- if (not torch.all((unique_vals == 0) | (unique_vals == 1)).item()):
34
- raise ValueError("Only categorical groups are currently supported in copula covariance estimation.")
35
-
36
- def fit(self, uniformizer: Callable, **kwargs):
37
- sums = {g: np.zeros(self.n_outcomes) for g in self.groups}
38
- second_moments = {g: np.eye(self.n_outcomes) for g in self.groups}
39
- Ng = {g: 0 for g in self.groups}
40
-
41
- for y, x_dict in tqdm(self.loader, desc="Estimating copula covariance"):
42
- memberships = x_dict.get("group").numpy()
43
- u = uniformizer(y, x_dict)
44
-
45
- for g in self.groups:
46
- ix = np.where(memberships[:, self.group_col[g]] == 1)
47
- z = norm().ppf(u[ix])
48
- second_moments[g] += z.T @ z
49
- sums[g] += z.sum(axis=0)
50
- Ng[g] += len(ix[0])
51
-
52
- covariances = {}
53
- for g in self.groups:
54
- mean = sums[g] / Ng[g]
55
- covariances[g] = second_moments[g] / Ng[g] - np.outer(mean, mean)
56
-
57
- if len(self.groups) == 1:
58
- covariances = list(covariances.values())[0]
59
- self.parameters = self.format_parameters(covariances)
60
-
61
- def format_parameters(self, covariances: Union[Dict, np.array]):
62
- var_names = self.adata.var_names
63
- def to_df(mat):
64
- return pd.DataFrame(mat, index=var_names, columns=var_names)
65
-
66
- if isinstance(covariances, dict):
67
- formatted = {}
68
- for k, v in covariances.items():
69
- formatted[k] = to_df(v)
70
- covariances = formatted
71
- return covariances
72
-
73
- if isinstance(covariances, (np.ndarray, list, tuple)):
74
- covariances = to_df(covariances)
75
- return covariances
76
-
77
- def pseudo_obs(self, x_dict: Dict):
78
- # convert one-hot encoding memberships to a map
79
- # {"group1": [indices of group 1], "group2": [indices of group 2]}
80
- memberships = x_dict.get("group").numpy()
81
- group_ix = {g: np.where(memberships[:, self.group_col[g] == 1])[0] for g in self.groups}
82
-
83
- # initialize the result
84
- u = np.zeros((len(memberships), self.n_outcomes))
85
- parameters = self.parameters
86
- if type(parameters) is not dict:
87
- parameters = {group: parameters}
88
-
89
- # loop over groups and sample each part in turn
90
- for group, sigma in parameters.items():
91
- z = np.random.multivariate_normal(
92
- mean=np.zeros(self.n_outcomes),
93
- cov=sigma,
94
- size=len(group_ix[group])
95
- )
96
- normal_distn = norm(0, np.diag(sigma) ** 0.5)
97
- u[group_ix[group]] = normal_distn.cdf(z)
98
- return u
99
-
100
- def likelihood(self, uniformizer: Callable, batch: Tuple[torch.Tensor, Dict[str, torch.Tensor]]):
101
- # uniformize the observations
102
- y, x_dict = batch
103
- u = uniformizer(y, x_dict)
104
- z = norm().ppf(u)
105
-
106
- # same group manipulation as for pseudobs
107
- parameters = self.parameters
108
- if type(parameters) is not dict:
109
- parameters = {group: parameters}
110
-
111
- memberships = x_dict.get("group").numpy()
112
- group_ix = {g: np.where(memberships[:, self.group_col[g] == 1])[0] for g in self.groups}
113
- ll = np.zeros(len(z))
114
- for group, sigma in parameters.items():
115
- ix = group_ix[group]
116
- if len(ix) > 0:
117
- copula_ll = multivariate_normal.logpdf(z[ix], np.zeros(sigma.shape[0]), sigma)
118
- ll[ix] = copula_ll - norm.logpdf(z[ix]).sum(axis=1)
119
- return ll
120
-
121
- def num_params(self, **kwargs):
122
- S = self.parameters
123
- per_group = [(np.sum(S[g].values != 0) - S[g].shape[0]) / 2 for g in self.groups]
124
- return sum(per_group)