gengeneeval 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geneval/__init__.py +52 -1
- geneval/data/__init__.py +14 -0
- geneval/data/lazy_loader.py +562 -0
- geneval/evaluator.py +46 -0
- geneval/lazy_evaluator.py +424 -0
- geneval/metrics/__init__.py +25 -0
- geneval/metrics/accelerated.py +857 -0
- {gengeneeval-0.2.0.dist-info → gengeneeval-0.3.0.dist-info}/METADATA +111 -4
- {gengeneeval-0.2.0.dist-info → gengeneeval-0.3.0.dist-info}/RECORD +12 -9
- {gengeneeval-0.2.0.dist-info → gengeneeval-0.3.0.dist-info}/WHEEL +0 -0
- {gengeneeval-0.2.0.dist-info → gengeneeval-0.3.0.dist-info}/entry_points.txt +0 -0
- {gengeneeval-0.2.0.dist-info → gengeneeval-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gengeneeval
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, train/test splits, and publication-quality visualizations.
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
7
|
-
Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking
|
|
7
|
+
Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking,memory-efficient
|
|
8
8
|
Author: GenEval Team
|
|
9
9
|
Author-email: geneval@example.com
|
|
10
10
|
Requires-Python: >=3.8,<4.0
|
|
@@ -24,6 +24,7 @@ Provides-Extra: full
|
|
|
24
24
|
Provides-Extra: gpu
|
|
25
25
|
Requires-Dist: anndata (>=0.8.0)
|
|
26
26
|
Requires-Dist: geomloss (>=0.2.1) ; extra == "full" or extra == "gpu"
|
|
27
|
+
Requires-Dist: joblib (>=1.0.0)
|
|
27
28
|
Requires-Dist: matplotlib (>=3.5.0)
|
|
28
29
|
Requires-Dist: numpy (>=1.21.0)
|
|
29
30
|
Requires-Dist: pandas (>=1.3.0)
|
|
@@ -46,7 +47,7 @@ Description-Content-Type: text/markdown
|
|
|
46
47
|
|
|
47
48
|
**Comprehensive evaluation of generated gene expression data against real datasets.**
|
|
48
49
|
|
|
49
|
-
GenEval is a modular, object-oriented Python framework for computing metrics between real and generated gene expression datasets stored in AnnData (h5ad) format. It supports condition-based matching, train/test splits, and generates publication-quality visualizations.
|
|
50
|
+
GenEval is a modular, object-oriented Python framework for computing metrics between real and generated gene expression datasets stored in AnnData (h5ad) format. It supports condition-based matching, train/test splits, memory-efficient lazy loading for large datasets, and generates publication-quality visualizations.
|
|
50
51
|
|
|
51
52
|
## Features
|
|
52
53
|
|
|
@@ -77,6 +78,10 @@ All metrics are computed **per-gene** (returning a vector) and **aggregated**:
|
|
|
77
78
|
- ✅ Condition-based matching (perturbation, cell type, etc.)
|
|
78
79
|
- ✅ Train/test split support
|
|
79
80
|
- ✅ Per-gene and aggregate metrics
|
|
81
|
+
- ✅ **Memory-efficient lazy loading** for large datasets
|
|
82
|
+
- ✅ **Batched evaluation** to avoid OOM errors
|
|
83
|
+
- ✅ **CPU parallelization** via joblib (multi-core speedup)
|
|
84
|
+
- ✅ **GPU acceleration** via PyTorch (10-100x speedup)
|
|
80
85
|
- ✅ Modular, extensible architecture
|
|
81
86
|
- ✅ Command-line interface
|
|
82
87
|
- ✅ Publication-quality visualizations
|
|
@@ -140,6 +145,108 @@ geneval --real real.h5ad --generated generated.h5ad \
|
|
|
140
145
|
--output results/
|
|
141
146
|
```
|
|
142
147
|
|
|
148
|
+
### Memory-Efficient Mode (for Large Datasets)
|
|
149
|
+
|
|
150
|
+
For datasets too large to fit in memory, use the lazy evaluation API:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from geneval import evaluate_lazy, load_data_lazy
|
|
154
|
+
|
|
155
|
+
# Memory-efficient evaluation (streams data one condition at a time)
|
|
156
|
+
results = evaluate_lazy(
|
|
157
|
+
real_path="large_real.h5ad",
|
|
158
|
+
generated_path="large_generated.h5ad",
|
|
159
|
+
condition_columns=["perturbation"],
|
|
160
|
+
batch_size=256, # Process in batches
|
|
161
|
+
use_backed=True, # Memory-mapped file access
|
|
162
|
+
output_dir="eval_output/",
|
|
163
|
+
save_per_condition=True, # Save each condition to disk
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Get summary statistics
|
|
167
|
+
print(results.get_summary())
|
|
168
|
+
|
|
169
|
+
# Or use the lazy loader directly for custom workflows
|
|
170
|
+
with load_data_lazy("real.h5ad", "gen.h5ad", ["perturbation"]) as loader:
|
|
171
|
+
print(f"Memory estimate: {loader.estimate_memory_usage()}")
|
|
172
|
+
|
|
173
|
+
# Process one condition at a time
|
|
174
|
+
for key, real, gen, info in loader.iterate_conditions():
|
|
175
|
+
# Your custom evaluation logic
|
|
176
|
+
pass
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Accelerated Evaluation (CPU Parallelization & GPU)
|
|
180
|
+
|
|
181
|
+
GenEval supports CPU parallelization and GPU acceleration for significant speedups:
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
from geneval import evaluate, get_available_backends
|
|
185
|
+
|
|
186
|
+
# Check available backends
|
|
187
|
+
print(get_available_backends())
|
|
188
|
+
# {'joblib': True, 'torch': True, 'geomloss': True, 'cuda': True, 'mps': False}
|
|
189
|
+
|
|
190
|
+
# Parallel CPU evaluation (use all cores)
|
|
191
|
+
results = evaluate(
|
|
192
|
+
real_path="real.h5ad",
|
|
193
|
+
generated_path="generated.h5ad",
|
|
194
|
+
condition_columns=["perturbation"],
|
|
195
|
+
n_jobs=-1, # Use all available CPU cores
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# GPU-accelerated evaluation
|
|
199
|
+
results = evaluate(
|
|
200
|
+
real_path="real.h5ad",
|
|
201
|
+
generated_path="generated.h5ad",
|
|
202
|
+
condition_columns=["perturbation"],
|
|
203
|
+
device="cuda", # Use NVIDIA GPU
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Combined: parallel CPU + auto device selection
|
|
207
|
+
results = evaluate(..., n_jobs=8, device="auto")
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
#### Low-level Accelerated API
|
|
211
|
+
|
|
212
|
+
For custom workflows, use the accelerated metrics directly:
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
from geneval.metrics.accelerated import (
|
|
216
|
+
compute_metrics_accelerated,
|
|
217
|
+
GPUWasserstein1,
|
|
218
|
+
GPUMMD,
|
|
219
|
+
vectorized_wasserstein1,
|
|
220
|
+
)
|
|
221
|
+
import numpy as np
|
|
222
|
+
|
|
223
|
+
# Load your data
|
|
224
|
+
real = np.random.randn(1000, 5000) # 1000 cells, 5000 genes
|
|
225
|
+
generated = np.random.randn(1000, 5000)
|
|
226
|
+
|
|
227
|
+
# Compute multiple metrics with acceleration
|
|
228
|
+
results = compute_metrics_accelerated(
|
|
229
|
+
real, generated,
|
|
230
|
+
metrics=["wasserstein_1", "wasserstein_2", "mmd", "energy"],
|
|
231
|
+
n_jobs=8, # CPU parallelization
|
|
232
|
+
device="cuda", # GPU acceleration
|
|
233
|
+
verbose=True,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Access results
|
|
237
|
+
print(f"W1: {results['wasserstein_1'].aggregate_value:.4f}")
|
|
238
|
+
print(f"MMD: {results['mmd'].aggregate_value:.4f}")
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
#### Performance Tips
|
|
242
|
+
|
|
243
|
+
| Optimization | Speedup | When to Use |
|
|
244
|
+
|--------------|---------|-------------|
|
|
245
|
+
| `n_jobs=-1` (all cores) | 4-16x | Always (if joblib available) |
|
|
246
|
+
| `device="cuda"` | 10-100x | Large datasets, NVIDIA GPU available |
|
|
247
|
+
| `device="mps"` | 5-20x | Apple Silicon Macs |
|
|
248
|
+
| Vectorized NumPy | 2-5x | Automatic fallback |
|
|
249
|
+
|
|
143
250
|
## Expected Data Format
|
|
144
251
|
|
|
145
252
|
GenEval expects AnnData (h5ad) files with:
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
geneval/__init__.py,sha256=
|
|
1
|
+
geneval/__init__.py,sha256=K0E3Jyt3l7_KxqIeI3upBBBrjRA4ASdRFugaxMVVGRM,4306
|
|
2
2
|
geneval/cli.py,sha256=0ai0IGyn3SSmEnfLRJhcr0brvUxuNZHE4IXod7jvosU,9977
|
|
3
3
|
geneval/config.py,sha256=gkCjs_gzPWgUZNcmSR3Y70XQCAZ1m9AKLueaM-x8bvw,3729
|
|
4
4
|
geneval/core.py,sha256=No0DP8bNR6LedfCWEedY9C5r_c4M14rvSPaGZqbxc94,1155
|
|
5
|
-
geneval/data/__init__.py,sha256=
|
|
5
|
+
geneval/data/__init__.py,sha256=NQUPVpUnBIabrTH5TuRk0KE9S7sVO5QetZv-MCQmZuw,827
|
|
6
6
|
geneval/data/gene_expression_datamodule.py,sha256=XiBIdf68JZ-3S-FaZsrQlBJA7qL9uUXo2C8y0r4an5M,8009
|
|
7
|
+
geneval/data/lazy_loader.py,sha256=5fTRVjPjcWvYXV-uPWFUF2Nn9rHRdD8lygAUkCW8wOM,20677
|
|
7
8
|
geneval/data/loader.py,sha256=zpRmwGZ4PJkB3rpXXRCMFtvMi4qvUrPkKmvIlGjfRpY,14555
|
|
8
|
-
geneval/evaluator.py,sha256=
|
|
9
|
+
geneval/evaluator.py,sha256=WgdrgqOcGYT35k1keiFEIIRIj2CQaD2DsmBpq9hcLrI,13440
|
|
9
10
|
geneval/evaluators/__init__.py,sha256=i11sHvhsjEAeI3Aw9zFTPmCYuqkGxzTHggAKehe3HQ0,160
|
|
10
11
|
geneval/evaluators/base_evaluator.py,sha256=yJL568HdNofIcHgNOElSQMVlG9oRPTTDIZ7CmKccRqs,5967
|
|
11
12
|
geneval/evaluators/gene_expression_evaluator.py,sha256=v8QL6tzOQ3QVXdPMM8tFHTTviZC3WsPRX4G0ShgeDUw,8743
|
|
12
|
-
geneval/
|
|
13
|
+
geneval/lazy_evaluator.py,sha256=I_VvDolxPFGiW38eGPrjSoBOKICKyYN3GHbjJBAe5tg,13200
|
|
14
|
+
geneval/metrics/__init__.py,sha256=yVlNcFxfudOE4q-Y1VNJIXw1HrM70LkxocJgg3Cp7vo,2359
|
|
15
|
+
geneval/metrics/accelerated.py,sha256=iVxXg1Bf4aAeh-0kz7JRZS1I7xHHy9vNRozGDmCY_QY,27364
|
|
13
16
|
geneval/metrics/base_metric.py,sha256=prbnB-Ap-P64m-2_TUrHxO3NFQaw-obVg1Tw4pjC5EY,6961
|
|
14
17
|
geneval/metrics/correlation.py,sha256=jpYmaihWK89J1E5yQinGUJeB6pTZ21xPNHJi3XYyXJE,6987
|
|
15
18
|
geneval/metrics/distances.py,sha256=9mWzbMbIBY1ckOd2a0l3by3aEFMQZL9bVMSeP44xzUg,16155
|
|
@@ -25,8 +28,8 @@ geneval/utils/preprocessing.py,sha256=1Cij1O2dwDR6_zh5IEgLPq3jEmV8VfIRjfQrHiKe3M
|
|
|
25
28
|
geneval/visualization/__init__.py,sha256=LN19jl5xV4WVJTePaOUHWvKZ_pgDFp1chhcklGkNtm8,792
|
|
26
29
|
geneval/visualization/plots.py,sha256=3K94r3x5NjIUZ-hYVQIivO63VkLOvDWl-BLB_qL2pSY,15008
|
|
27
30
|
geneval/visualization/visualizer.py,sha256=lX7K0j20nAsgdtOOdbxLdLKYAfovEp3hNAnZOjFTCq0,36670
|
|
28
|
-
gengeneeval-0.
|
|
29
|
-
gengeneeval-0.
|
|
30
|
-
gengeneeval-0.
|
|
31
|
-
gengeneeval-0.
|
|
32
|
-
gengeneeval-0.
|
|
31
|
+
gengeneeval-0.3.0.dist-info/METADATA,sha256=5K2bIh59OEM88dNVeUWPOevyyAbnAIyiKaZu6VmJIh0,9680
|
|
32
|
+
gengeneeval-0.3.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
33
|
+
gengeneeval-0.3.0.dist-info/entry_points.txt,sha256=xTkwnNa2fP0w1uGVsafzRTaCeuBSWLlNO-1CN8uBSK0,43
|
|
34
|
+
gengeneeval-0.3.0.dist-info/licenses/LICENSE,sha256=RDHgHDI4rSDq35R4CAC3npy86YUnmZ81ecO7aHfmmGA,1073
|
|
35
|
+
gengeneeval-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|