gengeneeval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geneval/__init__.py +129 -0
- geneval/cli.py +333 -0
- geneval/config.py +141 -0
- geneval/core.py +41 -0
- geneval/data/__init__.py +23 -0
- geneval/data/gene_expression_datamodule.py +211 -0
- geneval/data/loader.py +437 -0
- geneval/evaluator.py +359 -0
- geneval/evaluators/__init__.py +4 -0
- geneval/evaluators/base_evaluator.py +178 -0
- geneval/evaluators/gene_expression_evaluator.py +218 -0
- geneval/metrics/__init__.py +65 -0
- geneval/metrics/base_metric.py +229 -0
- geneval/metrics/correlation.py +232 -0
- geneval/metrics/distances.py +516 -0
- geneval/metrics/metrics.py +134 -0
- geneval/models/__init__.py +1 -0
- geneval/models/base_model.py +53 -0
- geneval/results.py +334 -0
- geneval/testing.py +393 -0
- geneval/utils/__init__.py +1 -0
- geneval/utils/io.py +27 -0
- geneval/utils/preprocessing.py +82 -0
- geneval/visualization/__init__.py +38 -0
- geneval/visualization/plots.py +499 -0
- geneval/visualization/visualizer.py +1096 -0
- gengeneeval-0.1.0.dist-info/METADATA +172 -0
- gengeneeval-0.1.0.dist-info/RECORD +31 -0
- gengeneeval-0.1.0.dist-info/WHEEL +4 -0
- gengeneeval-0.1.0.dist-info/entry_points.txt +3 -0
- gengeneeval-0.1.0.dist-info/licenses/LICENSE +9 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Correlation metrics for gene expression evaluation.
|
|
3
|
+
|
|
4
|
+
Provides Pearson and Spearman correlation metrics with per-gene computation.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from scipy.stats import pearsonr, spearmanr
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .base_metric import CorrelationMetric
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PearsonCorrelation(CorrelationMetric):
|
|
16
|
+
"""
|
|
17
|
+
Pearson correlation coefficient between real and generated gene expression.
|
|
18
|
+
|
|
19
|
+
Computed per gene by correlating expression values across samples.
|
|
20
|
+
Higher values (closer to 1) indicate better agreement.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
super().__init__(
|
|
25
|
+
name="pearson",
|
|
26
|
+
description="Pearson correlation coefficient (per gene across samples)"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def compute_per_gene(
|
|
30
|
+
self,
|
|
31
|
+
real: np.ndarray,
|
|
32
|
+
generated: np.ndarray,
|
|
33
|
+
) -> np.ndarray:
|
|
34
|
+
"""
|
|
35
|
+
Compute Pearson correlation for each gene.
|
|
36
|
+
|
|
37
|
+
For each gene, correlates expression values between:
|
|
38
|
+
- Mean expression across real samples
|
|
39
|
+
- Mean expression across generated samples
|
|
40
|
+
|
|
41
|
+
Or if sample sizes match, computes correlation across paired samples.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
real : np.ndarray
|
|
46
|
+
Real data, shape (n_samples_real, n_genes)
|
|
47
|
+
generated : np.ndarray
|
|
48
|
+
Generated data, shape (n_samples_gen, n_genes)
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
np.ndarray
|
|
53
|
+
Pearson correlation per gene
|
|
54
|
+
"""
|
|
55
|
+
real = np.atleast_2d(real)
|
|
56
|
+
generated = np.atleast_2d(generated)
|
|
57
|
+
n_genes = real.shape[1]
|
|
58
|
+
|
|
59
|
+
correlations = np.zeros(n_genes)
|
|
60
|
+
|
|
61
|
+
# If sample sizes match, compute correlation across samples
|
|
62
|
+
if real.shape[0] == generated.shape[0]:
|
|
63
|
+
for i in range(n_genes):
|
|
64
|
+
r_vals = real[:, i]
|
|
65
|
+
g_vals = generated[:, i]
|
|
66
|
+
|
|
67
|
+
# Skip if constant values
|
|
68
|
+
if np.std(r_vals) == 0 or np.std(g_vals) == 0:
|
|
69
|
+
correlations[i] = np.nan
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
corr, _ = pearsonr(r_vals, g_vals)
|
|
73
|
+
correlations[i] = corr
|
|
74
|
+
else:
|
|
75
|
+
# Use mean profiles when sample sizes differ
|
|
76
|
+
real_mean = real.mean(axis=0)
|
|
77
|
+
gen_mean = generated.mean(axis=0)
|
|
78
|
+
|
|
79
|
+
# Compute single overall correlation
|
|
80
|
+
if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
|
|
81
|
+
return np.full(n_genes, np.nan)
|
|
82
|
+
|
|
83
|
+
overall_corr, _ = pearsonr(real_mean, gen_mean)
|
|
84
|
+
# Return same value for all genes (overall correlation)
|
|
85
|
+
correlations[:] = overall_corr
|
|
86
|
+
|
|
87
|
+
return correlations
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class SpearmanCorrelation(CorrelationMetric):
|
|
91
|
+
"""
|
|
92
|
+
Spearman rank correlation between real and generated gene expression.
|
|
93
|
+
|
|
94
|
+
More robust to outliers than Pearson. Measures monotonic relationship.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self):
|
|
98
|
+
super().__init__(
|
|
99
|
+
name="spearman",
|
|
100
|
+
description="Spearman rank correlation coefficient"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def compute_per_gene(
|
|
104
|
+
self,
|
|
105
|
+
real: np.ndarray,
|
|
106
|
+
generated: np.ndarray,
|
|
107
|
+
) -> np.ndarray:
|
|
108
|
+
"""
|
|
109
|
+
Compute Spearman correlation for each gene.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
real : np.ndarray
|
|
114
|
+
Real data, shape (n_samples_real, n_genes)
|
|
115
|
+
generated : np.ndarray
|
|
116
|
+
Generated data, shape (n_samples_gen, n_genes)
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
np.ndarray
|
|
121
|
+
Spearman correlation per gene
|
|
122
|
+
"""
|
|
123
|
+
real = np.atleast_2d(real)
|
|
124
|
+
generated = np.atleast_2d(generated)
|
|
125
|
+
n_genes = real.shape[1]
|
|
126
|
+
|
|
127
|
+
correlations = np.zeros(n_genes)
|
|
128
|
+
|
|
129
|
+
if real.shape[0] == generated.shape[0]:
|
|
130
|
+
for i in range(n_genes):
|
|
131
|
+
r_vals = real[:, i]
|
|
132
|
+
g_vals = generated[:, i]
|
|
133
|
+
|
|
134
|
+
if np.std(r_vals) == 0 or np.std(g_vals) == 0:
|
|
135
|
+
correlations[i] = np.nan
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
corr, _ = spearmanr(r_vals, g_vals)
|
|
139
|
+
correlations[i] = corr
|
|
140
|
+
else:
|
|
141
|
+
# Use mean profiles
|
|
142
|
+
real_mean = real.mean(axis=0)
|
|
143
|
+
gen_mean = generated.mean(axis=0)
|
|
144
|
+
|
|
145
|
+
if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
|
|
146
|
+
return np.full(n_genes, np.nan)
|
|
147
|
+
|
|
148
|
+
overall_corr, _ = spearmanr(real_mean, gen_mean)
|
|
149
|
+
correlations[:] = overall_corr
|
|
150
|
+
|
|
151
|
+
return correlations
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class MeanPearsonCorrelation(CorrelationMetric):
|
|
155
|
+
"""
|
|
156
|
+
Pearson correlation on mean expression profiles.
|
|
157
|
+
|
|
158
|
+
Computes mean expression per gene, then correlates the profiles.
|
|
159
|
+
Returns single value replicated across genes.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(self):
|
|
163
|
+
super().__init__(
|
|
164
|
+
name="mean_pearson",
|
|
165
|
+
description="Pearson correlation on mean expression profiles"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def compute_per_gene(
|
|
169
|
+
self,
|
|
170
|
+
real: np.ndarray,
|
|
171
|
+
generated: np.ndarray,
|
|
172
|
+
) -> np.ndarray:
|
|
173
|
+
"""
|
|
174
|
+
Compute correlation between mean profiles.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
real : np.ndarray
|
|
179
|
+
Real data, shape (n_samples_real, n_genes)
|
|
180
|
+
generated : np.ndarray
|
|
181
|
+
Generated data, shape (n_samples_gen, n_genes)
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
np.ndarray
|
|
186
|
+
Single correlation value replicated per gene
|
|
187
|
+
"""
|
|
188
|
+
real = np.atleast_2d(real)
|
|
189
|
+
generated = np.atleast_2d(generated)
|
|
190
|
+
n_genes = real.shape[1]
|
|
191
|
+
|
|
192
|
+
real_mean = real.mean(axis=0)
|
|
193
|
+
gen_mean = generated.mean(axis=0)
|
|
194
|
+
|
|
195
|
+
if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
|
|
196
|
+
return np.full(n_genes, np.nan)
|
|
197
|
+
|
|
198
|
+
corr, _ = pearsonr(real_mean, gen_mean)
|
|
199
|
+
return np.full(n_genes, corr)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class MeanSpearmanCorrelation(CorrelationMetric):
|
|
203
|
+
"""
|
|
204
|
+
Spearman correlation on mean expression profiles.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
def __init__(self):
|
|
208
|
+
super().__init__(
|
|
209
|
+
name="mean_spearman",
|
|
210
|
+
description="Spearman correlation on mean expression profiles"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def compute_per_gene(
|
|
214
|
+
self,
|
|
215
|
+
real: np.ndarray,
|
|
216
|
+
generated: np.ndarray,
|
|
217
|
+
) -> np.ndarray:
|
|
218
|
+
"""
|
|
219
|
+
Compute Spearman correlation between mean profiles.
|
|
220
|
+
"""
|
|
221
|
+
real = np.atleast_2d(real)
|
|
222
|
+
generated = np.atleast_2d(generated)
|
|
223
|
+
n_genes = real.shape[1]
|
|
224
|
+
|
|
225
|
+
real_mean = real.mean(axis=0)
|
|
226
|
+
gen_mean = generated.mean(axis=0)
|
|
227
|
+
|
|
228
|
+
if np.std(real_mean) == 0 or np.std(gen_mean) == 0:
|
|
229
|
+
return np.full(n_genes, np.nan)
|
|
230
|
+
|
|
231
|
+
corr, _ = spearmanr(real_mean, gen_mean)
|
|
232
|
+
return np.full(n_genes, corr)
|