gengeneeval 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,232 @@
1
+ """
2
+ Correlation metrics for gene expression evaluation.
3
+
4
+ Provides Pearson and Spearman correlation metrics with per-gene computation.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import numpy as np
9
+ from scipy.stats import pearsonr, spearmanr
10
+ from typing import Optional
11
+
12
+ from .base_metric import CorrelationMetric
13
+
14
+
15
+ class PearsonCorrelation(CorrelationMetric):
16
+ """
17
+ Pearson correlation coefficient between real and generated gene expression.
18
+
19
+ Computed per gene by correlating expression values across samples.
20
+ Higher values (closer to 1) indicate better agreement.
21
+ """
22
+
23
+ def __init__(self):
24
+ super().__init__(
25
+ name="pearson",
26
+ description="Pearson correlation coefficient (per gene across samples)"
27
+ )
28
+
29
+ def compute_per_gene(
30
+ self,
31
+ real: np.ndarray,
32
+ generated: np.ndarray,
33
+ ) -> np.ndarray:
34
+ """
35
+ Compute Pearson correlation for each gene.
36
+
37
+ For each gene, correlates expression values between:
38
+ - Mean expression across real samples
39
+ - Mean expression across generated samples
40
+
41
+ Or if sample sizes match, computes correlation across paired samples.
42
+
43
+ Parameters
44
+ ----------
45
+ real : np.ndarray
46
+ Real data, shape (n_samples_real, n_genes)
47
+ generated : np.ndarray
48
+ Generated data, shape (n_samples_gen, n_genes)
49
+
50
+ Returns
51
+ -------
52
+ np.ndarray
53
+ Pearson correlation per gene
54
+ """
55
+ real = np.atleast_2d(real)
56
+ generated = np.atleast_2d(generated)
57
+ n_genes = real.shape[1]
58
+
59
+ correlations = np.zeros(n_genes)
60
+
61
+ # If sample sizes match, compute correlation across samples
62
+ if real.shape[0] == generated.shape[0]:
63
+ for i in range(n_genes):
64
+ r_vals = real[:, i]
65
+ g_vals = generated[:, i]
66
+
67
+ # Skip if constant values
68
+ if np.std(r_vals) == 0 or np.std(g_vals) == 0:
69
+ correlations[i] = np.nan
70
+ continue
71
+
72
+ corr, _ = pearsonr(r_vals, g_vals)
73
+ correlations[i] = corr
74
+ else:
75
+ # Use mean profiles when sample sizes differ
76
+ real_mean = real.mean(axis=0)
77
+ gen_mean = generated.mean(axis=0)
78
+
79
+ # Compute single overall correlation
80
+ if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
81
+ return np.full(n_genes, np.nan)
82
+
83
+ overall_corr, _ = pearsonr(real_mean, gen_mean)
84
+ # Return same value for all genes (overall correlation)
85
+ correlations[:] = overall_corr
86
+
87
+ return correlations
88
+
89
+
90
+ class SpearmanCorrelation(CorrelationMetric):
91
+ """
92
+ Spearman rank correlation between real and generated gene expression.
93
+
94
+ More robust to outliers than Pearson. Measures monotonic relationship.
95
+ """
96
+
97
+ def __init__(self):
98
+ super().__init__(
99
+ name="spearman",
100
+ description="Spearman rank correlation coefficient"
101
+ )
102
+
103
+ def compute_per_gene(
104
+ self,
105
+ real: np.ndarray,
106
+ generated: np.ndarray,
107
+ ) -> np.ndarray:
108
+ """
109
+ Compute Spearman correlation for each gene.
110
+
111
+ Parameters
112
+ ----------
113
+ real : np.ndarray
114
+ Real data, shape (n_samples_real, n_genes)
115
+ generated : np.ndarray
116
+ Generated data, shape (n_samples_gen, n_genes)
117
+
118
+ Returns
119
+ -------
120
+ np.ndarray
121
+ Spearman correlation per gene
122
+ """
123
+ real = np.atleast_2d(real)
124
+ generated = np.atleast_2d(generated)
125
+ n_genes = real.shape[1]
126
+
127
+ correlations = np.zeros(n_genes)
128
+
129
+ if real.shape[0] == generated.shape[0]:
130
+ for i in range(n_genes):
131
+ r_vals = real[:, i]
132
+ g_vals = generated[:, i]
133
+
134
+ if np.std(r_vals) == 0 or np.std(g_vals) == 0:
135
+ correlations[i] = np.nan
136
+ continue
137
+
138
+ corr, _ = spearmanr(r_vals, g_vals)
139
+ correlations[i] = corr
140
+ else:
141
+ # Use mean profiles
142
+ real_mean = real.mean(axis=0)
143
+ gen_mean = generated.mean(axis=0)
144
+
145
+ if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
146
+ return np.full(n_genes, np.nan)
147
+
148
+ overall_corr, _ = spearmanr(real_mean, gen_mean)
149
+ correlations[:] = overall_corr
150
+
151
+ return correlations
152
+
153
+
154
+ class MeanPearsonCorrelation(CorrelationMetric):
155
+ """
156
+ Pearson correlation on mean expression profiles.
157
+
158
+ Computes mean expression per gene, then correlates the profiles.
159
+ Returns single value replicated across genes.
160
+ """
161
+
162
+ def __init__(self):
163
+ super().__init__(
164
+ name="mean_pearson",
165
+ description="Pearson correlation on mean expression profiles"
166
+ )
167
+
168
+ def compute_per_gene(
169
+ self,
170
+ real: np.ndarray,
171
+ generated: np.ndarray,
172
+ ) -> np.ndarray:
173
+ """
174
+ Compute correlation between mean profiles.
175
+
176
+ Parameters
177
+ ----------
178
+ real : np.ndarray
179
+ Real data, shape (n_samples_real, n_genes)
180
+ generated : np.ndarray
181
+ Generated data, shape (n_samples_gen, n_genes)
182
+
183
+ Returns
184
+ -------
185
+ np.ndarray
186
+ Single correlation value replicated per gene
187
+ """
188
+ real = np.atleast_2d(real)
189
+ generated = np.atleast_2d(generated)
190
+ n_genes = real.shape[1]
191
+
192
+ real_mean = real.mean(axis=0)
193
+ gen_mean = generated.mean(axis=0)
194
+
195
+ if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
196
+ return np.full(n_genes, np.nan)
197
+
198
+ corr, _ = pearsonr(real_mean, gen_mean)
199
+ return np.full(n_genes, corr)
200
+
201
+
202
+ class MeanSpearmanCorrelation(CorrelationMetric):
203
+ """
204
+ Spearman correlation on mean expression profiles.
205
+ """
206
+
207
+ def __init__(self):
208
+ super().__init__(
209
+ name="mean_spearman",
210
+ description="Spearman correlation on mean expression profiles"
211
+ )
212
+
213
+ def compute_per_gene(
214
+ self,
215
+ real: np.ndarray,
216
+ generated: np.ndarray,
217
+ ) -> np.ndarray:
218
+ """
219
+ Compute Spearman correlation between mean profiles.
220
+ """
221
+ real = np.atleast_2d(real)
222
+ generated = np.atleast_2d(generated)
223
+ n_genes = real.shape[1]
224
+
225
+ real_mean = real.mean(axis=0)
226
+ gen_mean = generated.mean(axis=0)
227
+
228
+ if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
229
+ return np.full(n_genes, np.nan)
230
+
231
+ corr, _ = spearmanr(real_mean, gen_mean)
232
+ return np.full(n_genes, corr)