gengeneeval 0.3.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/PKG-INFO +125 -3
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/README.md +122 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/pyproject.toml +3 -3
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/__init__.py +43 -1
- gengeneeval-0.4.1/src/geneval/deg/__init__.py +69 -0
- gengeneeval-0.4.1/src/geneval/deg/context.py +271 -0
- gengeneeval-0.4.1/src/geneval/deg/detection.py +578 -0
- gengeneeval-0.4.1/src/geneval/deg/evaluator.py +821 -0
- gengeneeval-0.4.1/src/geneval/deg/visualization.py +376 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/LICENSE +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/cli.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/config.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/core.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/data/__init__.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/data/gene_expression_datamodule.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/data/lazy_loader.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/data/loader.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/evaluator.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/evaluators/__init__.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/evaluators/base_evaluator.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/evaluators/gene_expression_evaluator.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/lazy_evaluator.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/metrics/__init__.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/metrics/accelerated.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/metrics/base_metric.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/metrics/correlation.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/metrics/distances.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/metrics/metrics.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/metrics/reconstruction.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/models/__init__.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/models/base_model.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/results.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/testing.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/utils/__init__.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/utils/io.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/utils/preprocessing.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/visualization/__init__.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/visualization/plots.py +0 -0
- {gengeneeval-0.3.0 → gengeneeval-0.4.1}/src/geneval/visualization/visualizer.py +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gengeneeval
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations.
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
7
|
-
Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking,memory-efficient
|
|
7
|
+
Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking,memory-efficient,DEG,perturbation
|
|
8
8
|
Author: GenEval Team
|
|
9
9
|
Author-email: geneval@example.com
|
|
10
10
|
Requires-Python: >=3.8,<4.0
|
|
@@ -78,6 +78,8 @@ All metrics are computed **per-gene** (returning a vector) and **aggregated**:
|
|
|
78
78
|
- ✅ Condition-based matching (perturbation, cell type, etc.)
|
|
79
79
|
- ✅ Train/test split support
|
|
80
80
|
- ✅ Per-gene and aggregate metrics
|
|
81
|
+
- ✅ **DEG-focused evaluation** with per-context (covariate × perturbation) support
|
|
82
|
+
- ✅ **Fast DEG detection** via vectorized Welch's t-test, Student's t-test, Wilcoxon
|
|
81
83
|
- ✅ **Memory-efficient lazy loading** for large datasets
|
|
82
84
|
- ✅ **Batched evaluation** to avoid OOM errors
|
|
83
85
|
- ✅ **CPU parallelization** via joblib (multi-core speedup)
|
|
@@ -247,6 +249,126 @@ print(f"MMD: {results['mmd'].aggregate_value:.4f}")
|
|
|
247
249
|
| `device="mps"` | 5-20x | Apple Silicon Macs |
|
|
248
250
|
| Vectorized NumPy | 2-5x | Automatic fallback |
|
|
249
251
|
|
|
252
|
+
### DEG-Focused Evaluation
|
|
253
|
+
|
|
254
|
+
GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, computing metrics only on biologically relevant DEGs rather than all genes. This provides more meaningful evaluation for perturbation prediction tasks.
|
|
255
|
+
|
|
256
|
+
#### Key Features
|
|
257
|
+
|
|
258
|
+
- **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
|
|
259
|
+
- **DEG vs all-genes comparison**: Compute metrics on both and compare
|
|
260
|
+
- **Flexible DEG selection**: Top N by significance, or threshold-based filtering
|
|
261
|
+
- **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
|
|
262
|
+
- **GPU acceleration**: DEG detection and metrics on GPU for large datasets
|
|
263
|
+
- **Comprehensive reporting**: Aggregated and expanded results with visualizations
|
|
264
|
+
|
|
265
|
+
#### Quick Start
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
from geneval import evaluate_degs
|
|
269
|
+
import pandas as pd
|
|
270
|
+
|
|
271
|
+
# Evaluate with DEG-focused metrics (computes both DEG and all-genes by default)
|
|
272
|
+
results = evaluate_degs(
|
|
273
|
+
real_data=real_adata.X, # (n_samples, n_genes)
|
|
274
|
+
generated_data=gen_adata.X,
|
|
275
|
+
real_obs=real_adata.obs,
|
|
276
|
+
generated_obs=gen_adata.obs,
|
|
277
|
+
condition_columns=["cell_type", "perturbation"], # Context columns
|
|
278
|
+
control_key="control", # Value indicating control samples
|
|
279
|
+
perturbation_column="perturbation",
|
|
280
|
+
deg_method="welch", # or "student", "wilcoxon", "logfc"
|
|
281
|
+
pval_threshold=0.05, # Significance threshold
|
|
282
|
+
lfc_threshold=0.5, # log2 fold change threshold
|
|
283
|
+
compute_all_genes=True, # Also compute metrics on all genes
|
|
284
|
+
device="cuda", # GPU acceleration
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Compare DEG-only vs all-genes metrics
|
|
288
|
+
print(results.comparison_summary)
|
|
289
|
+
# metric deg_mean all_mean difference ratio
|
|
290
|
+
# wasserstein_1 5.34 0.69 4.65 7.74
|
|
291
|
+
# mmd 1.14 0.13 1.02 9.00
|
|
292
|
+
|
|
293
|
+
# Access per-context results
|
|
294
|
+
print(results.expanded_metrics) # Has deg_* and all_* columns
|
|
295
|
+
print(results.deg_summary) # DEG counts per context
|
|
296
|
+
|
|
297
|
+
# Save results with plots
|
|
298
|
+
results.save("deg_evaluation/")
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
#### DEG Selection Control
|
|
302
|
+
|
|
303
|
+
```python
|
|
304
|
+
# Option 1: Top N most significant DEGs
|
|
305
|
+
results = evaluate_degs(
|
|
306
|
+
...,
|
|
307
|
+
n_top_degs=50, # Use only top 50 DEGs by adjusted p-value
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Option 2: Stricter thresholds
|
|
311
|
+
results = evaluate_degs(
|
|
312
|
+
...,
|
|
313
|
+
pval_threshold=0.01, # More stringent p-value
|
|
314
|
+
lfc_threshold=1.0, # 2-fold change minimum
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Option 3: DEGs only (skip all-genes metrics for speed)
|
|
318
|
+
results = evaluate_degs(
|
|
319
|
+
...,
|
|
320
|
+
compute_all_genes=False,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Get DEG-only or all-genes metrics separately
|
|
324
|
+
deg_only = results.get_deg_only_metrics()
|
|
325
|
+
all_genes = results.get_all_genes_metrics()
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
#### Per-Context Evaluation
|
|
329
|
+
|
|
330
|
+
When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
|
|
331
|
+
|
|
332
|
+
| Context | n_DEGs | deg_W1 | all_W1 | deg_MMD | all_MMD |
|
|
333
|
+
|---------|--------|--------|--------|---------|---------|
|
|
334
|
+
| TypeA_drug1 | 234 | 5.42 | 0.69 | 1.03 | 0.13 |
|
|
335
|
+
| TypeA_drug2 | 189 | 4.38 | 0.71 | 0.92 | 0.12 |
|
|
336
|
+
| TypeB_drug1 | 312 | 6.51 | 0.68 | 1.21 | 0.14 |
|
|
337
|
+
|
|
338
|
+
If only `perturbation` column is provided, evaluation is done per-perturbation.
|
|
339
|
+
|
|
340
|
+
#### Available DEG Methods
|
|
341
|
+
|
|
342
|
+
| Method | Description | Speed |
|
|
343
|
+
|--------|-------------|-------|
|
|
344
|
+
| `welch` | Welch's t-test (unequal variance) | ⚡ Fast |
|
|
345
|
+
| `student` | Student's t-test (equal variance) | ⚡ Fast |
|
|
346
|
+
| `wilcoxon` | Wilcoxon rank-sum (non-parametric) | 🐢 Slower |
|
|
347
|
+
| `logfc` | Log fold change only (no p-value) | ⚡⚡ Fastest |
|
|
348
|
+
|
|
349
|
+
#### Visualization
|
|
350
|
+
|
|
351
|
+
```python
|
|
352
|
+
from geneval.deg import (
|
|
353
|
+
plot_deg_distributions,
|
|
354
|
+
plot_context_heatmap,
|
|
355
|
+
plot_deg_counts,
|
|
356
|
+
create_deg_report,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Distribution of metrics across contexts
|
|
360
|
+
plot_deg_distributions(results, save_path="dist.png")
|
|
361
|
+
|
|
362
|
+
# Heatmap: context × metric
|
|
363
|
+
plot_context_heatmap(results, save_path="heatmap.png")
|
|
364
|
+
|
|
365
|
+
# DEG counts per context (up/down regulated)
|
|
366
|
+
plot_deg_counts(results, save_path="deg_counts.png")
|
|
367
|
+
|
|
368
|
+
# Generate comprehensive report
|
|
369
|
+
create_deg_report(results, "report/", include_plots=True)
|
|
370
|
+
```
|
|
371
|
+
|
|
250
372
|
## Expected Data Format
|
|
251
373
|
|
|
252
374
|
GenEval expects AnnData (h5ad) files with:
|
|
@@ -38,6 +38,8 @@ All metrics are computed **per-gene** (returning a vector) and **aggregated**:
|
|
|
38
38
|
- ✅ Condition-based matching (perturbation, cell type, etc.)
|
|
39
39
|
- ✅ Train/test split support
|
|
40
40
|
- ✅ Per-gene and aggregate metrics
|
|
41
|
+
- ✅ **DEG-focused evaluation** with per-context (covariate × perturbation) support
|
|
42
|
+
- ✅ **Fast DEG detection** via vectorized Welch's t-test, Student's t-test, Wilcoxon
|
|
41
43
|
- ✅ **Memory-efficient lazy loading** for large datasets
|
|
42
44
|
- ✅ **Batched evaluation** to avoid OOM errors
|
|
43
45
|
- ✅ **CPU parallelization** via joblib (multi-core speedup)
|
|
@@ -207,6 +209,126 @@ print(f"MMD: {results['mmd'].aggregate_value:.4f}")
|
|
|
207
209
|
| `device="mps"` | 5-20x | Apple Silicon Macs |
|
|
208
210
|
| Vectorized NumPy | 2-5x | Automatic fallback |
|
|
209
211
|
|
|
212
|
+
### DEG-Focused Evaluation
|
|
213
|
+
|
|
214
|
+
GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, computing metrics only on biologically relevant DEGs rather than all genes. This provides more meaningful evaluation for perturbation prediction tasks.
|
|
215
|
+
|
|
216
|
+
#### Key Features
|
|
217
|
+
|
|
218
|
+
- **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
|
|
219
|
+
- **DEG vs all-genes comparison**: Compute metrics on both and compare
|
|
220
|
+
- **Flexible DEG selection**: Top N by significance, or threshold-based filtering
|
|
221
|
+
- **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
|
|
222
|
+
- **GPU acceleration**: DEG detection and metrics on GPU for large datasets
|
|
223
|
+
- **Comprehensive reporting**: Aggregated and expanded results with visualizations
|
|
224
|
+
|
|
225
|
+
#### Quick Start
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from geneval import evaluate_degs
|
|
229
|
+
import pandas as pd
|
|
230
|
+
|
|
231
|
+
# Evaluate with DEG-focused metrics (computes both DEG and all-genes by default)
|
|
232
|
+
results = evaluate_degs(
|
|
233
|
+
real_data=real_adata.X, # (n_samples, n_genes)
|
|
234
|
+
generated_data=gen_adata.X,
|
|
235
|
+
real_obs=real_adata.obs,
|
|
236
|
+
generated_obs=gen_adata.obs,
|
|
237
|
+
condition_columns=["cell_type", "perturbation"], # Context columns
|
|
238
|
+
control_key="control", # Value indicating control samples
|
|
239
|
+
perturbation_column="perturbation",
|
|
240
|
+
deg_method="welch", # or "student", "wilcoxon", "logfc"
|
|
241
|
+
pval_threshold=0.05, # Significance threshold
|
|
242
|
+
lfc_threshold=0.5, # log2 fold change threshold
|
|
243
|
+
compute_all_genes=True, # Also compute metrics on all genes
|
|
244
|
+
device="cuda", # GPU acceleration
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Compare DEG-only vs all-genes metrics
|
|
248
|
+
print(results.comparison_summary)
|
|
249
|
+
# metric deg_mean all_mean difference ratio
|
|
250
|
+
# wasserstein_1 5.34 0.69 4.65 7.74
|
|
251
|
+
# mmd 1.14 0.13 1.02 9.00
|
|
252
|
+
|
|
253
|
+
# Access per-context results
|
|
254
|
+
print(results.expanded_metrics) # Has deg_* and all_* columns
|
|
255
|
+
print(results.deg_summary) # DEG counts per context
|
|
256
|
+
|
|
257
|
+
# Save results with plots
|
|
258
|
+
results.save("deg_evaluation/")
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
#### DEG Selection Control
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
# Option 1: Top N most significant DEGs
|
|
265
|
+
results = evaluate_degs(
|
|
266
|
+
...,
|
|
267
|
+
n_top_degs=50, # Use only top 50 DEGs by adjusted p-value
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Option 2: Stricter thresholds
|
|
271
|
+
results = evaluate_degs(
|
|
272
|
+
...,
|
|
273
|
+
pval_threshold=0.01, # More stringent p-value
|
|
274
|
+
lfc_threshold=1.0, # 2-fold change minimum
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# Option 3: DEGs only (skip all-genes metrics for speed)
|
|
278
|
+
results = evaluate_degs(
|
|
279
|
+
...,
|
|
280
|
+
compute_all_genes=False,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Get DEG-only or all-genes metrics separately
|
|
284
|
+
deg_only = results.get_deg_only_metrics()
|
|
285
|
+
all_genes = results.get_all_genes_metrics()
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
#### Per-Context Evaluation
|
|
289
|
+
|
|
290
|
+
When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
|
|
291
|
+
|
|
292
|
+
| Context | n_DEGs | deg_W1 | all_W1 | deg_MMD | all_MMD |
|
|
293
|
+
|---------|--------|--------|--------|---------|---------|
|
|
294
|
+
| TypeA_drug1 | 234 | 5.42 | 0.69 | 1.03 | 0.13 |
|
|
295
|
+
| TypeA_drug2 | 189 | 4.38 | 0.71 | 0.92 | 0.12 |
|
|
296
|
+
| TypeB_drug1 | 312 | 6.51 | 0.68 | 1.21 | 0.14 |
|
|
297
|
+
|
|
298
|
+
If only `perturbation` column is provided, evaluation is done per-perturbation.
|
|
299
|
+
|
|
300
|
+
#### Available DEG Methods
|
|
301
|
+
|
|
302
|
+
| Method | Description | Speed |
|
|
303
|
+
|--------|-------------|-------|
|
|
304
|
+
| `welch` | Welch's t-test (unequal variance) | ⚡ Fast |
|
|
305
|
+
| `student` | Student's t-test (equal variance) | ⚡ Fast |
|
|
306
|
+
| `wilcoxon` | Wilcoxon rank-sum (non-parametric) | 🐢 Slower |
|
|
307
|
+
| `logfc` | Log fold change only (no p-value) | ⚡⚡ Fastest |
|
|
308
|
+
|
|
309
|
+
#### Visualization
|
|
310
|
+
|
|
311
|
+
```python
|
|
312
|
+
from geneval.deg import (
|
|
313
|
+
plot_deg_distributions,
|
|
314
|
+
plot_context_heatmap,
|
|
315
|
+
plot_deg_counts,
|
|
316
|
+
create_deg_report,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Distribution of metrics across contexts
|
|
320
|
+
plot_deg_distributions(results, save_path="dist.png")
|
|
321
|
+
|
|
322
|
+
# Heatmap: context × metric
|
|
323
|
+
plot_context_heatmap(results, save_path="heatmap.png")
|
|
324
|
+
|
|
325
|
+
# DEG counts per context (up/down regulated)
|
|
326
|
+
plot_deg_counts(results, save_path="deg_counts.png")
|
|
327
|
+
|
|
328
|
+
# Generate comprehensive report
|
|
329
|
+
create_deg_report(results, "report/", include_plots=True)
|
|
330
|
+
```
|
|
331
|
+
|
|
210
332
|
## Expected Data Format
|
|
211
333
|
|
|
212
334
|
GenEval expects AnnData (h5ad) files with:
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "gengeneeval"
|
|
3
|
-
version = "0.
|
|
4
|
-
description = "Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations."
|
|
3
|
+
version = "0.4.1"
|
|
4
|
+
description = "Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations."
|
|
5
5
|
authors = ["GenEval Team <geneval@example.com>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
homepage = "https://github.com/AndreaRubbi/GenGeneEval"
|
|
9
9
|
repository = "https://github.com/AndreaRubbi/GenGeneEval"
|
|
10
|
-
keywords = ["gene expression", "evaluation", "metrics", "single-cell", "generative models", "benchmarking", "memory-efficient"]
|
|
10
|
+
keywords = ["gene expression", "evaluation", "metrics", "single-cell", "generative models", "benchmarking", "memory-efficient", "DEG", "perturbation"]
|
|
11
11
|
classifiers = [
|
|
12
12
|
"Development Status :: 4 - Beta",
|
|
13
13
|
"Intended Audience :: Science/Research",
|
|
@@ -7,8 +7,10 @@ and generated gene expression datasets stored in AnnData (h5ad) format.
|
|
|
7
7
|
Features:
|
|
8
8
|
- Multiple distance and correlation metrics (per-gene and aggregate)
|
|
9
9
|
- Condition-based matching (perturbation, cell type, etc.)
|
|
10
|
+
- DEG-focused evaluation with per-context (covariate × perturbation) support
|
|
10
11
|
- Train/test split support
|
|
11
12
|
- Memory-efficient lazy loading for large datasets
|
|
13
|
+
- CPU parallelization and GPU acceleration
|
|
12
14
|
- Publication-quality visualizations
|
|
13
15
|
- Command-line interface
|
|
14
16
|
|
|
@@ -21,6 +23,17 @@ Quick Start:
|
|
|
21
23
|
... output_dir="output/"
|
|
22
24
|
... )
|
|
23
25
|
|
|
26
|
+
DEG-Focused Evaluation:
|
|
27
|
+
>>> from geneval import evaluate_degs
|
|
28
|
+
>>> results = evaluate_degs(
|
|
29
|
+
... real_data, generated_data,
|
|
30
|
+
... real_obs, generated_obs,
|
|
31
|
+
... condition_columns=["cell_type", "perturbation"],
|
|
32
|
+
... control_key="control",
|
|
33
|
+
... deg_method="welch",
|
|
34
|
+
... device="cuda", # GPU acceleration
|
|
35
|
+
... )
|
|
36
|
+
|
|
24
37
|
Memory-Efficient Mode (for large datasets):
|
|
25
38
|
>>> from geneval import evaluate_lazy
|
|
26
39
|
>>> results = evaluate_lazy(
|
|
@@ -36,7 +49,7 @@ CLI Usage:
|
|
|
36
49
|
--conditions perturbation cell_type --output results/
|
|
37
50
|
"""
|
|
38
51
|
|
|
39
|
-
__version__ = "0.
|
|
52
|
+
__version__ = "0.4.1"
|
|
40
53
|
__author__ = "GenEval Team"
|
|
41
54
|
|
|
42
55
|
# Main evaluation interface
|
|
@@ -109,6 +122,22 @@ from .metrics.accelerated import (
|
|
|
109
122
|
compute_metrics_accelerated,
|
|
110
123
|
)
|
|
111
124
|
|
|
125
|
+
# DEG-focused evaluation
|
|
126
|
+
from .deg import (
|
|
127
|
+
DEGEvaluator,
|
|
128
|
+
DEGResult,
|
|
129
|
+
DEGEvaluationResult,
|
|
130
|
+
ContextEvaluator,
|
|
131
|
+
ContextResult,
|
|
132
|
+
compute_degs_fast,
|
|
133
|
+
compute_degs_gpu,
|
|
134
|
+
get_contexts,
|
|
135
|
+
plot_deg_distributions,
|
|
136
|
+
plot_context_heatmap,
|
|
137
|
+
create_deg_report,
|
|
138
|
+
)
|
|
139
|
+
from .deg.evaluator import evaluate_degs
|
|
140
|
+
|
|
112
141
|
# Visualization
|
|
113
142
|
from .visualization.visualizer import (
|
|
114
143
|
EvaluationVisualizer,
|
|
@@ -174,6 +203,19 @@ __all__ = [
|
|
|
174
203
|
"ParallelMetricComputer",
|
|
175
204
|
"get_available_backends",
|
|
176
205
|
"compute_metrics_accelerated",
|
|
206
|
+
# DEG evaluation
|
|
207
|
+
"DEGEvaluator",
|
|
208
|
+
"DEGResult",
|
|
209
|
+
"DEGEvaluationResult",
|
|
210
|
+
"ContextEvaluator",
|
|
211
|
+
"ContextResult",
|
|
212
|
+
"compute_degs_fast",
|
|
213
|
+
"compute_degs_gpu",
|
|
214
|
+
"evaluate_degs",
|
|
215
|
+
"get_contexts",
|
|
216
|
+
"plot_deg_distributions",
|
|
217
|
+
"plot_context_heatmap",
|
|
218
|
+
"create_deg_report",
|
|
177
219
|
# Visualization
|
|
178
220
|
"EvaluationVisualizer",
|
|
179
221
|
"visualize",
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Differentially Expressed Genes (DEG) module for GenGeneEval.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Fast DEG detection using vectorized statistical tests
|
|
6
|
+
- Per-context evaluation (covariates × perturbations)
|
|
7
|
+
- DEG-focused metrics computation
|
|
8
|
+
- Integration with GPU acceleration
|
|
9
|
+
|
|
10
|
+
Example usage:
|
|
11
|
+
>>> from geneval.deg import DEGEvaluator, compute_degs_fast
|
|
12
|
+
>>> degs = compute_degs_fast(control_data, perturbed_data, method="welch")
|
|
13
|
+
>>> evaluator = DEGEvaluator(loader, deg_method="welch", pval_threshold=0.05)
|
|
14
|
+
>>> results = evaluator.evaluate()
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .detection import (
|
|
18
|
+
compute_degs_fast,
|
|
19
|
+
compute_degs_gpu,
|
|
20
|
+
compute_degs_auto,
|
|
21
|
+
DEGResult,
|
|
22
|
+
DEGMethod,
|
|
23
|
+
)
|
|
24
|
+
from .context import (
|
|
25
|
+
ContextEvaluator,
|
|
26
|
+
ContextResult,
|
|
27
|
+
get_contexts,
|
|
28
|
+
get_context_id,
|
|
29
|
+
filter_by_context,
|
|
30
|
+
)
|
|
31
|
+
from .evaluator import (
|
|
32
|
+
DEGEvaluator,
|
|
33
|
+
DEGEvaluationResult,
|
|
34
|
+
DEGSettings,
|
|
35
|
+
ContextMetrics,
|
|
36
|
+
evaluate_degs,
|
|
37
|
+
)
|
|
38
|
+
from .visualization import (
|
|
39
|
+
plot_deg_distributions,
|
|
40
|
+
plot_context_heatmap,
|
|
41
|
+
plot_deg_counts,
|
|
42
|
+
create_deg_report,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# Detection
|
|
47
|
+
"compute_degs_fast",
|
|
48
|
+
"compute_degs_gpu",
|
|
49
|
+
"compute_degs_auto",
|
|
50
|
+
"DEGResult",
|
|
51
|
+
"DEGMethod",
|
|
52
|
+
# Context
|
|
53
|
+
"ContextEvaluator",
|
|
54
|
+
"ContextResult",
|
|
55
|
+
"get_contexts",
|
|
56
|
+
"get_context_id",
|
|
57
|
+
"filter_by_context",
|
|
58
|
+
# Evaluator
|
|
59
|
+
"DEGEvaluator",
|
|
60
|
+
"DEGEvaluationResult",
|
|
61
|
+
"DEGSettings",
|
|
62
|
+
"ContextMetrics",
|
|
63
|
+
"evaluate_degs",
|
|
64
|
+
# Visualization
|
|
65
|
+
"plot_deg_distributions",
|
|
66
|
+
"plot_context_heatmap",
|
|
67
|
+
"plot_deg_counts",
|
|
68
|
+
"create_deg_report",
|
|
69
|
+
]
|