gengeneeval 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/PKG-INFO +46 -11
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/README.md +45 -10
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/pyproject.toml +1 -1
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/__init__.py +1 -1
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/__init__.py +4 -0
- gengeneeval-0.4.1/src/geneval/deg/evaluator.py +821 -0
- gengeneeval-0.4.0/src/geneval/deg/evaluator.py +0 -538
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/LICENSE +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/cli.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/config.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/core.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/__init__.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/gene_expression_datamodule.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/lazy_loader.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/loader.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/context.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/detection.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/visualization.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluator.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluators/__init__.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluators/base_evaluator.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluators/gene_expression_evaluator.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/lazy_evaluator.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/__init__.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/accelerated.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/base_metric.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/correlation.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/distances.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/metrics.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/reconstruction.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/models/__init__.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/models/base_model.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/results.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/testing.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/utils/__init__.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/utils/io.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/utils/preprocessing.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/visualization/__init__.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/visualization/plots.py +0 -0
- {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/visualization/visualizer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gengeneeval
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -256,6 +256,8 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
|
|
|
256
256
|
#### Key Features
|
|
257
257
|
|
|
258
258
|
- **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
|
|
259
|
+
- **DEG vs all-genes comparison**: Compute metrics on both and compare
|
|
260
|
+
- **Flexible DEG selection**: Top N by significance, or threshold-based filtering
|
|
259
261
|
- **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
|
|
260
262
|
- **GPU acceleration**: DEG detection and metrics on GPU for large datasets
|
|
261
263
|
- **Comprehensive reporting**: Aggregated and expanded results with visualizations
|
|
@@ -266,7 +268,7 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
|
|
|
266
268
|
from geneval import evaluate_degs
|
|
267
269
|
import pandas as pd
|
|
268
270
|
|
|
269
|
-
# Evaluate with DEG-focused metrics
|
|
271
|
+
# Evaluate with DEG-focused metrics (computes both DEG and all-genes by default)
|
|
270
272
|
results = evaluate_degs(
|
|
271
273
|
real_data=real_adata.X, # (n_samples, n_genes)
|
|
272
274
|
generated_data=gen_adata.X,
|
|
@@ -276,29 +278,62 @@ results = evaluate_degs(
|
|
|
276
278
|
control_key="control", # Value indicating control samples
|
|
277
279
|
perturbation_column="perturbation",
|
|
278
280
|
deg_method="welch", # or "student", "wilcoxon", "logfc"
|
|
279
|
-
pval_threshold=0.05,
|
|
281
|
+
pval_threshold=0.05, # Significance threshold
|
|
280
282
|
lfc_threshold=0.5, # log2 fold change threshold
|
|
283
|
+
compute_all_genes=True, # Also compute metrics on all genes
|
|
281
284
|
device="cuda", # GPU acceleration
|
|
282
285
|
)
|
|
283
286
|
|
|
284
|
-
#
|
|
285
|
-
print(results.
|
|
286
|
-
|
|
287
|
+
# Compare DEG-only vs all-genes metrics
|
|
288
|
+
print(results.comparison_summary)
|
|
289
|
+
# metric deg_mean all_mean difference ratio
|
|
290
|
+
# wasserstein_1 5.34 0.69 4.65 7.74
|
|
291
|
+
# mmd 1.14 0.13 1.02 9.00
|
|
292
|
+
|
|
293
|
+
# Access per-context results
|
|
294
|
+
print(results.expanded_metrics) # Has deg_* and all_* columns
|
|
287
295
|
print(results.deg_summary) # DEG counts per context
|
|
288
296
|
|
|
289
297
|
# Save results with plots
|
|
290
298
|
results.save("deg_evaluation/")
|
|
291
299
|
```
|
|
292
300
|
|
|
301
|
+
#### DEG Selection Control
|
|
302
|
+
|
|
303
|
+
```python
|
|
304
|
+
# Option 1: Top N most significant DEGs
|
|
305
|
+
results = evaluate_degs(
|
|
306
|
+
...,
|
|
307
|
+
n_top_degs=50, # Use only top 50 DEGs by adjusted p-value
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Option 2: Stricter thresholds
|
|
311
|
+
results = evaluate_degs(
|
|
312
|
+
...,
|
|
313
|
+
pval_threshold=0.01, # More stringent p-value
|
|
314
|
+
lfc_threshold=1.0, # 2-fold change minimum
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Option 3: DEGs only (skip all-genes metrics for speed)
|
|
318
|
+
results = evaluate_degs(
|
|
319
|
+
...,
|
|
320
|
+
compute_all_genes=False,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Get DEG-only or all-genes metrics separately
|
|
324
|
+
deg_only = results.get_deg_only_metrics()
|
|
325
|
+
all_genes = results.get_all_genes_metrics()
|
|
326
|
+
```
|
|
327
|
+
|
|
293
328
|
#### Per-Context Evaluation
|
|
294
329
|
|
|
295
330
|
When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
|
|
296
331
|
|
|
297
|
-
| Context | n_DEGs |
|
|
298
|
-
|
|
299
|
-
| TypeA_drug1 | 234 |
|
|
300
|
-
| TypeA_drug2 | 189 |
|
|
301
|
-
| TypeB_drug1 | 312 |
|
|
332
|
+
| Context | n_DEGs | deg_W1 | all_W1 | deg_MMD | all_MMD |
|
|
333
|
+
|---------|--------|--------|--------|---------|---------|
|
|
334
|
+
| TypeA_drug1 | 234 | 5.42 | 0.69 | 1.03 | 0.13 |
|
|
335
|
+
| TypeA_drug2 | 189 | 4.38 | 0.71 | 0.92 | 0.12 |
|
|
336
|
+
| TypeB_drug1 | 312 | 6.51 | 0.68 | 1.21 | 0.14 |
|
|
302
337
|
|
|
303
338
|
If only `perturbation` column is provided, evaluation is done per-perturbation.
|
|
304
339
|
|
|
@@ -216,6 +216,8 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
|
|
|
216
216
|
#### Key Features
|
|
217
217
|
|
|
218
218
|
- **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
|
|
219
|
+
- **DEG vs all-genes comparison**: Compute metrics on both and compare
|
|
220
|
+
- **Flexible DEG selection**: Top N by significance, or threshold-based filtering
|
|
219
221
|
- **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
|
|
220
222
|
- **GPU acceleration**: DEG detection and metrics on GPU for large datasets
|
|
221
223
|
- **Comprehensive reporting**: Aggregated and expanded results with visualizations
|
|
@@ -226,7 +228,7 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
|
|
|
226
228
|
from geneval import evaluate_degs
|
|
227
229
|
import pandas as pd
|
|
228
230
|
|
|
229
|
-
# Evaluate with DEG-focused metrics
|
|
231
|
+
# Evaluate with DEG-focused metrics (computes both DEG and all-genes by default)
|
|
230
232
|
results = evaluate_degs(
|
|
231
233
|
real_data=real_adata.X, # (n_samples, n_genes)
|
|
232
234
|
generated_data=gen_adata.X,
|
|
@@ -236,29 +238,62 @@ results = evaluate_degs(
|
|
|
236
238
|
control_key="control", # Value indicating control samples
|
|
237
239
|
perturbation_column="perturbation",
|
|
238
240
|
deg_method="welch", # or "student", "wilcoxon", "logfc"
|
|
239
|
-
pval_threshold=0.05,
|
|
241
|
+
pval_threshold=0.05, # Significance threshold
|
|
240
242
|
lfc_threshold=0.5, # log2 fold change threshold
|
|
243
|
+
compute_all_genes=True, # Also compute metrics on all genes
|
|
241
244
|
device="cuda", # GPU acceleration
|
|
242
245
|
)
|
|
243
246
|
|
|
244
|
-
#
|
|
245
|
-
print(results.
|
|
246
|
-
|
|
247
|
+
# Compare DEG-only vs all-genes metrics
|
|
248
|
+
print(results.comparison_summary)
|
|
249
|
+
# metric deg_mean all_mean difference ratio
|
|
250
|
+
# wasserstein_1 5.34 0.69 4.65 7.74
|
|
251
|
+
# mmd 1.14 0.13 1.02 9.00
|
|
252
|
+
|
|
253
|
+
# Access per-context results
|
|
254
|
+
print(results.expanded_metrics) # Has deg_* and all_* columns
|
|
247
255
|
print(results.deg_summary) # DEG counts per context
|
|
248
256
|
|
|
249
257
|
# Save results with plots
|
|
250
258
|
results.save("deg_evaluation/")
|
|
251
259
|
```
|
|
252
260
|
|
|
261
|
+
#### DEG Selection Control
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
# Option 1: Top N most significant DEGs
|
|
265
|
+
results = evaluate_degs(
|
|
266
|
+
...,
|
|
267
|
+
n_top_degs=50, # Use only top 50 DEGs by adjusted p-value
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Option 2: Stricter thresholds
|
|
271
|
+
results = evaluate_degs(
|
|
272
|
+
...,
|
|
273
|
+
pval_threshold=0.01, # More stringent p-value
|
|
274
|
+
lfc_threshold=1.0, # 2-fold change minimum
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# Option 3: DEGs only (skip all-genes metrics for speed)
|
|
278
|
+
results = evaluate_degs(
|
|
279
|
+
...,
|
|
280
|
+
compute_all_genes=False,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Get DEG-only or all-genes metrics separately
|
|
284
|
+
deg_only = results.get_deg_only_metrics()
|
|
285
|
+
all_genes = results.get_all_genes_metrics()
|
|
286
|
+
```
|
|
287
|
+
|
|
253
288
|
#### Per-Context Evaluation
|
|
254
289
|
|
|
255
290
|
When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
|
|
256
291
|
|
|
257
|
-
| Context | n_DEGs |
|
|
258
|
-
|
|
259
|
-
| TypeA_drug1 | 234 |
|
|
260
|
-
| TypeA_drug2 | 189 |
|
|
261
|
-
| TypeB_drug1 | 312 |
|
|
292
|
+
| Context | n_DEGs | deg_W1 | all_W1 | deg_MMD | all_MMD |
|
|
293
|
+
|---------|--------|--------|--------|---------|---------|
|
|
294
|
+
| TypeA_drug1 | 234 | 5.42 | 0.69 | 1.03 | 0.13 |
|
|
295
|
+
| TypeA_drug2 | 189 | 4.38 | 0.71 | 0.92 | 0.12 |
|
|
296
|
+
| TypeB_drug1 | 312 | 6.51 | 0.68 | 1.21 | 0.14 |
|
|
262
297
|
|
|
263
298
|
If only `perturbation` column is provided, evaluation is done per-perturbation.
|
|
264
299
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "gengeneeval"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.1"
|
|
4
4
|
description = "Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations."
|
|
5
5
|
authors = ["GenEval Team <geneval@example.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -31,6 +31,8 @@ from .context import (
|
|
|
31
31
|
from .evaluator import (
|
|
32
32
|
DEGEvaluator,
|
|
33
33
|
DEGEvaluationResult,
|
|
34
|
+
DEGSettings,
|
|
35
|
+
ContextMetrics,
|
|
34
36
|
evaluate_degs,
|
|
35
37
|
)
|
|
36
38
|
from .visualization import (
|
|
@@ -56,6 +58,8 @@ __all__ = [
|
|
|
56
58
|
# Evaluator
|
|
57
59
|
"DEGEvaluator",
|
|
58
60
|
"DEGEvaluationResult",
|
|
61
|
+
"DEGSettings",
|
|
62
|
+
"ContextMetrics",
|
|
59
63
|
"evaluate_degs",
|
|
60
64
|
# Visualization
|
|
61
65
|
"plot_deg_distributions",
|