gengeneeval 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geneval/__init__.py +56 -1
- geneval/deg/__init__.py +65 -0
- geneval/deg/context.py +271 -0
- geneval/deg/detection.py +578 -0
- geneval/deg/evaluator.py +538 -0
- geneval/deg/visualization.py +376 -0
- geneval/evaluator.py +46 -0
- geneval/metrics/__init__.py +25 -0
- geneval/metrics/accelerated.py +857 -0
- {gengeneeval-0.2.1.dist-info → gengeneeval-0.4.0.dist-info}/METADATA +164 -3
- {gengeneeval-0.2.1.dist-info → gengeneeval-0.4.0.dist-info}/RECORD +14 -8
- {gengeneeval-0.2.1.dist-info → gengeneeval-0.4.0.dist-info}/WHEEL +0 -0
- {gengeneeval-0.2.1.dist-info → gengeneeval-0.4.0.dist-info}/entry_points.txt +0 -0
- {gengeneeval-0.2.1.dist-info → gengeneeval-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gengeneeval
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, train/test splits, memory-efficient lazy loading, and publication-quality visualizations.
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
7
|
-
Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking,memory-efficient
|
|
7
|
+
Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking,memory-efficient,DEG,perturbation
|
|
8
8
|
Author: GenEval Team
|
|
9
9
|
Author-email: geneval@example.com
|
|
10
10
|
Requires-Python: >=3.8,<4.0
|
|
@@ -24,6 +24,7 @@ Provides-Extra: full
|
|
|
24
24
|
Provides-Extra: gpu
|
|
25
25
|
Requires-Dist: anndata (>=0.8.0)
|
|
26
26
|
Requires-Dist: geomloss (>=0.2.1) ; extra == "full" or extra == "gpu"
|
|
27
|
+
Requires-Dist: joblib (>=1.0.0)
|
|
27
28
|
Requires-Dist: matplotlib (>=3.5.0)
|
|
28
29
|
Requires-Dist: numpy (>=1.21.0)
|
|
29
30
|
Requires-Dist: pandas (>=1.3.0)
|
|
@@ -77,8 +78,12 @@ All metrics are computed **per-gene** (returning a vector) and **aggregated**:
|
|
|
77
78
|
- ✅ Condition-based matching (perturbation, cell type, etc.)
|
|
78
79
|
- ✅ Train/test split support
|
|
79
80
|
- ✅ Per-gene and aggregate metrics
|
|
81
|
+
- ✅ **DEG-focused evaluation** with per-context (covariate × perturbation) support
|
|
82
|
+
- ✅ **Fast DEG detection** via vectorized Welch's t-test, Student's t-test, Wilcoxon
|
|
80
83
|
- ✅ **Memory-efficient lazy loading** for large datasets
|
|
81
84
|
- ✅ **Batched evaluation** to avoid OOM errors
|
|
85
|
+
- ✅ **CPU parallelization** via joblib (multi-core speedup)
|
|
86
|
+
- ✅ **GPU acceleration** via PyTorch (10-100x speedup)
|
|
82
87
|
- ✅ Modular, extensible architecture
|
|
83
88
|
- ✅ Command-line interface
|
|
84
89
|
- ✅ Publication-quality visualizations
|
|
@@ -173,6 +178,162 @@ with load_data_lazy("real.h5ad", "gen.h5ad", ["perturbation"]) as loader:
|
|
|
173
178
|
pass
|
|
174
179
|
```
|
|
175
180
|
|
|
181
|
+
### Accelerated Evaluation (CPU Parallelization & GPU)
|
|
182
|
+
|
|
183
|
+
GenEval supports CPU parallelization and GPU acceleration for significant speedups:
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from geneval import evaluate, get_available_backends
|
|
187
|
+
|
|
188
|
+
# Check available backends
|
|
189
|
+
print(get_available_backends())
|
|
190
|
+
# {'joblib': True, 'torch': True, 'geomloss': True, 'cuda': True, 'mps': False}
|
|
191
|
+
|
|
192
|
+
# Parallel CPU evaluation (use all cores)
|
|
193
|
+
results = evaluate(
|
|
194
|
+
real_path="real.h5ad",
|
|
195
|
+
generated_path="generated.h5ad",
|
|
196
|
+
condition_columns=["perturbation"],
|
|
197
|
+
n_jobs=-1, # Use all available CPU cores
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# GPU-accelerated evaluation
|
|
201
|
+
results = evaluate(
|
|
202
|
+
real_path="real.h5ad",
|
|
203
|
+
generated_path="generated.h5ad",
|
|
204
|
+
condition_columns=["perturbation"],
|
|
205
|
+
device="cuda", # Use NVIDIA GPU
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Combined: parallel CPU + auto device selection
|
|
209
|
+
results = evaluate(..., n_jobs=8, device="auto")
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
#### Low-level Accelerated API
|
|
213
|
+
|
|
214
|
+
For custom workflows, use the accelerated metrics directly:
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
from geneval.metrics.accelerated import (
|
|
218
|
+
compute_metrics_accelerated,
|
|
219
|
+
GPUWasserstein1,
|
|
220
|
+
GPUMMD,
|
|
221
|
+
vectorized_wasserstein1,
|
|
222
|
+
)
|
|
223
|
+
import numpy as np
|
|
224
|
+
|
|
225
|
+
# Load your data
|
|
226
|
+
real = np.random.randn(1000, 5000) # 1000 cells, 5000 genes
|
|
227
|
+
generated = np.random.randn(1000, 5000)
|
|
228
|
+
|
|
229
|
+
# Compute multiple metrics with acceleration
|
|
230
|
+
results = compute_metrics_accelerated(
|
|
231
|
+
real, generated,
|
|
232
|
+
metrics=["wasserstein_1", "wasserstein_2", "mmd", "energy"],
|
|
233
|
+
n_jobs=8, # CPU parallelization
|
|
234
|
+
device="cuda", # GPU acceleration
|
|
235
|
+
verbose=True,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Access results
|
|
239
|
+
print(f"W1: {results['wasserstein_1'].aggregate_value:.4f}")
|
|
240
|
+
print(f"MMD: {results['mmd'].aggregate_value:.4f}")
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
#### Performance Tips
|
|
244
|
+
|
|
245
|
+
| Optimization | Speedup | When to Use |
|
|
246
|
+
|--------------|---------|-------------|
|
|
247
|
+
| `n_jobs=-1` (all cores) | 4-16x | Always (if joblib available) |
|
|
248
|
+
| `device="cuda"` | 10-100x | Large datasets, NVIDIA GPU available |
|
|
249
|
+
| `device="mps"` | 5-20x | Apple Silicon Macs |
|
|
250
|
+
| Vectorized NumPy | 2-5x | Automatic fallback |
|
|
251
|
+
|
|
252
|
+
### DEG-Focused Evaluation
|
|
253
|
+
|
|
254
|
+
GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, computing metrics only on biologically relevant DEGs rather than all genes. This provides more meaningful evaluation for perturbation prediction tasks.
|
|
255
|
+
|
|
256
|
+
#### Key Features
|
|
257
|
+
|
|
258
|
+
- **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
|
|
259
|
+
- **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
|
|
260
|
+
- **GPU acceleration**: DEG detection and metrics on GPU for large datasets
|
|
261
|
+
- **Comprehensive reporting**: Aggregated and expanded results with visualizations
|
|
262
|
+
|
|
263
|
+
#### Quick Start
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
from geneval import evaluate_degs
|
|
267
|
+
import pandas as pd
|
|
268
|
+
|
|
269
|
+
# Evaluate with DEG-focused metrics
|
|
270
|
+
results = evaluate_degs(
|
|
271
|
+
real_data=real_adata.X, # (n_samples, n_genes)
|
|
272
|
+
generated_data=gen_adata.X,
|
|
273
|
+
real_obs=real_adata.obs,
|
|
274
|
+
generated_obs=gen_adata.obs,
|
|
275
|
+
condition_columns=["cell_type", "perturbation"], # Context columns
|
|
276
|
+
control_key="control", # Value indicating control samples
|
|
277
|
+
perturbation_column="perturbation",
|
|
278
|
+
deg_method="welch", # or "student", "wilcoxon", "logfc"
|
|
279
|
+
pval_threshold=0.05,
|
|
280
|
+
lfc_threshold=0.5, # log2 fold change threshold
|
|
281
|
+
device="cuda", # GPU acceleration
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Access results
|
|
285
|
+
print(results.aggregated_metrics) # Summary across all contexts
|
|
286
|
+
print(results.expanded_metrics) # Per-context metrics
|
|
287
|
+
print(results.deg_summary) # DEG counts per context
|
|
288
|
+
|
|
289
|
+
# Save results with plots
|
|
290
|
+
results.save("deg_evaluation/")
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
#### Per-Context Evaluation
|
|
294
|
+
|
|
295
|
+
When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
|
|
296
|
+
|
|
297
|
+
| Context | n_DEGs | W1 (DEGs only) | MMD (DEGs only) |
|
|
298
|
+
|---------|--------|----------------|-----------------|
|
|
299
|
+
| TypeA_drug1 | 234 | 0.42 | 0.031 |
|
|
300
|
+
| TypeA_drug2 | 189 | 0.38 | 0.027 |
|
|
301
|
+
| TypeB_drug1 | 312 | 0.51 | 0.045 |
|
|
302
|
+
|
|
303
|
+
If only `perturbation` column is provided, evaluation is done per-perturbation.
|
|
304
|
+
|
|
305
|
+
#### Available DEG Methods
|
|
306
|
+
|
|
307
|
+
| Method | Description | Speed |
|
|
308
|
+
|--------|-------------|-------|
|
|
309
|
+
| `welch` | Welch's t-test (unequal variance) | ⚡ Fast |
|
|
310
|
+
| `student` | Student's t-test (equal variance) | ⚡ Fast |
|
|
311
|
+
| `wilcoxon` | Wilcoxon rank-sum (non-parametric) | 🐢 Slower |
|
|
312
|
+
| `logfc` | Log fold change only (no p-value) | ⚡⚡ Fastest |
|
|
313
|
+
|
|
314
|
+
#### Visualization
|
|
315
|
+
|
|
316
|
+
```python
|
|
317
|
+
from geneval.deg import (
|
|
318
|
+
plot_deg_distributions,
|
|
319
|
+
plot_context_heatmap,
|
|
320
|
+
plot_deg_counts,
|
|
321
|
+
create_deg_report,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Distribution of metrics across contexts
|
|
325
|
+
plot_deg_distributions(results, save_path="dist.png")
|
|
326
|
+
|
|
327
|
+
# Heatmap: context × metric
|
|
328
|
+
plot_context_heatmap(results, save_path="heatmap.png")
|
|
329
|
+
|
|
330
|
+
# DEG counts per context (up/down regulated)
|
|
331
|
+
plot_deg_counts(results, save_path="deg_counts.png")
|
|
332
|
+
|
|
333
|
+
# Generate comprehensive report
|
|
334
|
+
create_deg_report(results, "report/", include_plots=True)
|
|
335
|
+
```
|
|
336
|
+
|
|
176
337
|
## Expected Data Format
|
|
177
338
|
|
|
178
339
|
GenEval expects AnnData (h5ad) files with:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
geneval/__init__.py,sha256=
|
|
1
|
+
geneval/__init__.py,sha256=1ENlptAErFX1ThLDuO8J5Hs0ko5gIxGGVq7PZUhBUKY,5418
|
|
2
2
|
geneval/cli.py,sha256=0ai0IGyn3SSmEnfLRJhcr0brvUxuNZHE4IXod7jvosU,9977
|
|
3
3
|
geneval/config.py,sha256=gkCjs_gzPWgUZNcmSR3Y70XQCAZ1m9AKLueaM-x8bvw,3729
|
|
4
4
|
geneval/core.py,sha256=No0DP8bNR6LedfCWEedY9C5r_c4M14rvSPaGZqbxc94,1155
|
|
@@ -6,12 +6,18 @@ geneval/data/__init__.py,sha256=NQUPVpUnBIabrTH5TuRk0KE9S7sVO5QetZv-MCQmZuw,827
|
|
|
6
6
|
geneval/data/gene_expression_datamodule.py,sha256=XiBIdf68JZ-3S-FaZsrQlBJA7qL9uUXo2C8y0r4an5M,8009
|
|
7
7
|
geneval/data/lazy_loader.py,sha256=5fTRVjPjcWvYXV-uPWFUF2Nn9rHRdD8lygAUkCW8wOM,20677
|
|
8
8
|
geneval/data/loader.py,sha256=zpRmwGZ4PJkB3rpXXRCMFtvMi4qvUrPkKmvIlGjfRpY,14555
|
|
9
|
-
geneval/
|
|
9
|
+
geneval/deg/__init__.py,sha256=joH816k_UWvu2qVhWb-fTbMQTmAhz4nUvt6yraziRek,1499
|
|
10
|
+
geneval/deg/context.py,sha256=_9gnWnRqqCZUDlegV2sT_rQrw8OeP1TIE9NZjNcI0ig,9069
|
|
11
|
+
geneval/deg/detection.py,sha256=gDdHOyFLOfl_B0xutS3KVFy53sreJ19N33B0RRI01wo,18119
|
|
12
|
+
geneval/deg/evaluator.py,sha256=MiBT2GOXUwq9rxHVAnJOVSbybX0rVgTsSDvOeJtnanE,18570
|
|
13
|
+
geneval/deg/visualization.py,sha256=9lWW9vRH_FbkIjJrf1MPobU1Yu_CAh6aw60S7g2Qe2k,10448
|
|
14
|
+
geneval/evaluator.py,sha256=WgdrgqOcGYT35k1keiFEIIRIj2CQaD2DsmBpq9hcLrI,13440
|
|
10
15
|
geneval/evaluators/__init__.py,sha256=i11sHvhsjEAeI3Aw9zFTPmCYuqkGxzTHggAKehe3HQ0,160
|
|
11
16
|
geneval/evaluators/base_evaluator.py,sha256=yJL568HdNofIcHgNOElSQMVlG9oRPTTDIZ7CmKccRqs,5967
|
|
12
17
|
geneval/evaluators/gene_expression_evaluator.py,sha256=v8QL6tzOQ3QVXdPMM8tFHTTviZC3WsPRX4G0ShgeDUw,8743
|
|
13
18
|
geneval/lazy_evaluator.py,sha256=I_VvDolxPFGiW38eGPrjSoBOKICKyYN3GHbjJBAe5tg,13200
|
|
14
|
-
geneval/metrics/__init__.py,sha256=
|
|
19
|
+
geneval/metrics/__init__.py,sha256=yVlNcFxfudOE4q-Y1VNJIXw1HrM70LkxocJgg3Cp7vo,2359
|
|
20
|
+
geneval/metrics/accelerated.py,sha256=iVxXg1Bf4aAeh-0kz7JRZS1I7xHHy9vNRozGDmCY_QY,27364
|
|
15
21
|
geneval/metrics/base_metric.py,sha256=prbnB-Ap-P64m-2_TUrHxO3NFQaw-obVg1Tw4pjC5EY,6961
|
|
16
22
|
geneval/metrics/correlation.py,sha256=jpYmaihWK89J1E5yQinGUJeB6pTZ21xPNHJi3XYyXJE,6987
|
|
17
23
|
geneval/metrics/distances.py,sha256=9mWzbMbIBY1ckOd2a0l3by3aEFMQZL9bVMSeP44xzUg,16155
|
|
@@ -27,8 +33,8 @@ geneval/utils/preprocessing.py,sha256=1Cij1O2dwDR6_zh5IEgLPq3jEmV8VfIRjfQrHiKe3M
|
|
|
27
33
|
geneval/visualization/__init__.py,sha256=LN19jl5xV4WVJTePaOUHWvKZ_pgDFp1chhcklGkNtm8,792
|
|
28
34
|
geneval/visualization/plots.py,sha256=3K94r3x5NjIUZ-hYVQIivO63VkLOvDWl-BLB_qL2pSY,15008
|
|
29
35
|
geneval/visualization/visualizer.py,sha256=lX7K0j20nAsgdtOOdbxLdLKYAfovEp3hNAnZOjFTCq0,36670
|
|
30
|
-
gengeneeval-0.
|
|
31
|
-
gengeneeval-0.
|
|
32
|
-
gengeneeval-0.
|
|
33
|
-
gengeneeval-0.
|
|
34
|
-
gengeneeval-0.
|
|
36
|
+
gengeneeval-0.4.0.dist-info/METADATA,sha256=R3GI2E_z6qC1olM0D3aPKrJ3yjQDf_9-GncDqvNhwMY,12879
|
|
37
|
+
gengeneeval-0.4.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
38
|
+
gengeneeval-0.4.0.dist-info/entry_points.txt,sha256=xTkwnNa2fP0w1uGVsafzRTaCeuBSWLlNO-1CN8uBSK0,43
|
|
39
|
+
gengeneeval-0.4.0.dist-info/licenses/LICENSE,sha256=RDHgHDI4rSDq35R4CAC3npy86YUnmZ81ecO7aHfmmGA,1073
|
|
40
|
+
gengeneeval-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|