gengeneeval 0.2.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/PKG-INFO +164 -3
  2. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/README.md +160 -0
  3. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/pyproject.toml +4 -3
  4. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/__init__.py +56 -1
  5. gengeneeval-0.4.0/src/geneval/deg/__init__.py +65 -0
  6. gengeneeval-0.4.0/src/geneval/deg/context.py +271 -0
  7. gengeneeval-0.4.0/src/geneval/deg/detection.py +578 -0
  8. gengeneeval-0.4.0/src/geneval/deg/evaluator.py +538 -0
  9. gengeneeval-0.4.0/src/geneval/deg/visualization.py +376 -0
  10. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/evaluator.py +46 -0
  11. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/metrics/__init__.py +25 -0
  12. gengeneeval-0.4.0/src/geneval/metrics/accelerated.py +857 -0
  13. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/LICENSE +0 -0
  14. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/cli.py +0 -0
  15. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/config.py +0 -0
  16. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/core.py +0 -0
  17. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/data/__init__.py +0 -0
  18. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/data/gene_expression_datamodule.py +0 -0
  19. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/data/lazy_loader.py +0 -0
  20. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/data/loader.py +0 -0
  21. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/evaluators/__init__.py +0 -0
  22. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/evaluators/base_evaluator.py +0 -0
  23. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/evaluators/gene_expression_evaluator.py +0 -0
  24. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/lazy_evaluator.py +0 -0
  25. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/metrics/base_metric.py +0 -0
  26. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/metrics/correlation.py +0 -0
  27. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/metrics/distances.py +0 -0
  28. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/metrics/metrics.py +0 -0
  29. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/metrics/reconstruction.py +0 -0
  30. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/models/__init__.py +0 -0
  31. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/models/base_model.py +0 -0
  32. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/results.py +0 -0
  33. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/testing.py +0 -0
  34. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/utils/__init__.py +0 -0
  35. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/utils/io.py +0 -0
  36. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/utils/preprocessing.py +0 -0
  37. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/visualization/__init__.py +0 -0
  38. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/visualization/plots.py +0 -0
  39. {gengeneeval-0.2.1 → gengeneeval-0.4.0}/src/geneval/visualization/visualizer.py +0 -0
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gengeneeval
3
- Version: 0.2.1
4
- Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, train/test splits, memory-efficient lazy loading, and publication-quality visualizations.
3
+ Version: 0.4.0
4
+ Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations.
5
5
  License: MIT
6
6
  License-File: LICENSE
7
- Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking,memory-efficient
7
+ Keywords: gene expression,evaluation,metrics,single-cell,generative models,benchmarking,memory-efficient,DEG,perturbation
8
8
  Author: GenEval Team
9
9
  Author-email: geneval@example.com
10
10
  Requires-Python: >=3.8,<4.0
@@ -24,6 +24,7 @@ Provides-Extra: full
24
24
  Provides-Extra: gpu
25
25
  Requires-Dist: anndata (>=0.8.0)
26
26
  Requires-Dist: geomloss (>=0.2.1) ; extra == "full" or extra == "gpu"
27
+ Requires-Dist: joblib (>=1.0.0)
27
28
  Requires-Dist: matplotlib (>=3.5.0)
28
29
  Requires-Dist: numpy (>=1.21.0)
29
30
  Requires-Dist: pandas (>=1.3.0)
@@ -77,8 +78,12 @@ All metrics are computed **per-gene** (returning a vector) and **aggregated**:
77
78
  - ✅ Condition-based matching (perturbation, cell type, etc.)
78
79
  - ✅ Train/test split support
79
80
  - ✅ Per-gene and aggregate metrics
81
+ - ✅ **DEG-focused evaluation** with per-context (covariate × perturbation) support
82
+ - ✅ **Fast DEG detection** via vectorized Welch's t-test, Student's t-test, Wilcoxon
80
83
  - ✅ **Memory-efficient lazy loading** for large datasets
81
84
  - ✅ **Batched evaluation** to avoid OOM errors
85
+ - ✅ **CPU parallelization** via joblib (multi-core speedup)
86
+ - ✅ **GPU acceleration** via PyTorch (10-100x speedup)
82
87
  - ✅ Modular, extensible architecture
83
88
  - ✅ Command-line interface
84
89
  - ✅ Publication-quality visualizations
@@ -173,6 +178,162 @@ with load_data_lazy("real.h5ad", "gen.h5ad", ["perturbation"]) as loader:
173
178
  pass
174
179
  ```
175
180
 
181
+ ### Accelerated Evaluation (CPU Parallelization & GPU)
182
+
183
+ GenEval supports CPU parallelization and GPU acceleration for significant speedups:
184
+
185
+ ```python
186
+ from geneval import evaluate, get_available_backends
187
+
188
+ # Check available backends
189
+ print(get_available_backends())
190
+ # {'joblib': True, 'torch': True, 'geomloss': True, 'cuda': True, 'mps': False}
191
+
192
+ # Parallel CPU evaluation (use all cores)
193
+ results = evaluate(
194
+ real_path="real.h5ad",
195
+ generated_path="generated.h5ad",
196
+ condition_columns=["perturbation"],
197
+ n_jobs=-1, # Use all available CPU cores
198
+ )
199
+
200
+ # GPU-accelerated evaluation
201
+ results = evaluate(
202
+ real_path="real.h5ad",
203
+ generated_path="generated.h5ad",
204
+ condition_columns=["perturbation"],
205
+ device="cuda", # Use NVIDIA GPU
206
+ )
207
+
208
+ # Combined: parallel CPU + auto device selection
209
+ results = evaluate(..., n_jobs=8, device="auto")
210
+ ```
211
+
212
+ #### Low-level Accelerated API
213
+
214
+ For custom workflows, use the accelerated metrics directly:
215
+
216
+ ```python
217
+ from geneval.metrics.accelerated import (
218
+ compute_metrics_accelerated,
219
+ GPUWasserstein1,
220
+ GPUMMD,
221
+ vectorized_wasserstein1,
222
+ )
223
+ import numpy as np
224
+
225
+ # Load your data
226
+ real = np.random.randn(1000, 5000) # 1000 cells, 5000 genes
227
+ generated = np.random.randn(1000, 5000)
228
+
229
+ # Compute multiple metrics with acceleration
230
+ results = compute_metrics_accelerated(
231
+ real, generated,
232
+ metrics=["wasserstein_1", "wasserstein_2", "mmd", "energy"],
233
+ n_jobs=8, # CPU parallelization
234
+ device="cuda", # GPU acceleration
235
+ verbose=True,
236
+ )
237
+
238
+ # Access results
239
+ print(f"W1: {results['wasserstein_1'].aggregate_value:.4f}")
240
+ print(f"MMD: {results['mmd'].aggregate_value:.4f}")
241
+ ```
242
+
243
+ #### Performance Tips
244
+
245
+ | Optimization | Speedup | When to Use |
246
+ |--------------|---------|-------------|
247
+ | `n_jobs=-1` (all cores) | 4-16x | Always (if joblib available) |
248
+ | `device="cuda"` | 10-100x | Large datasets, NVIDIA GPU available |
249
+ | `device="mps"` | 5-20x | Apple Silicon Macs |
250
+ | Vectorized NumPy | 2-5x | Automatic fallback |
251
+
252
+ ### DEG-Focused Evaluation
253
+
254
+ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, computing metrics only on biologically relevant DEGs rather than all genes. This provides more meaningful evaluation for perturbation prediction tasks.
255
+
256
+ #### Key Features
257
+
258
+ - **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
259
+ - **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
260
+ - **GPU acceleration**: DEG detection and metrics on GPU for large datasets
261
+ - **Comprehensive reporting**: Aggregated and expanded results with visualizations
262
+
263
+ #### Quick Start
264
+
265
+ ```python
266
+ from geneval import evaluate_degs
267
+ import pandas as pd
268
+
269
+ # Evaluate with DEG-focused metrics
270
+ results = evaluate_degs(
271
+ real_data=real_adata.X, # (n_samples, n_genes)
272
+ generated_data=gen_adata.X,
273
+ real_obs=real_adata.obs,
274
+ generated_obs=gen_adata.obs,
275
+ condition_columns=["cell_type", "perturbation"], # Context columns
276
+ control_key="control", # Value indicating control samples
277
+ perturbation_column="perturbation",
278
+ deg_method="welch", # or "student", "wilcoxon", "logfc"
279
+ pval_threshold=0.05,
280
+ lfc_threshold=0.5, # log2 fold change threshold
281
+ device="cuda", # GPU acceleration
282
+ )
283
+
284
+ # Access results
285
+ print(results.aggregated_metrics) # Summary across all contexts
286
+ print(results.expanded_metrics) # Per-context metrics
287
+ print(results.deg_summary) # DEG counts per context
288
+
289
+ # Save results with plots
290
+ results.save("deg_evaluation/")
291
+ ```
292
+
293
+ #### Per-Context Evaluation
294
+
295
+ When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
296
+
297
+ | Context | n_DEGs | W1 (DEGs only) | MMD (DEGs only) |
298
+ |---------|--------|----------------|-----------------|
299
+ | TypeA_drug1 | 234 | 0.42 | 0.031 |
300
+ | TypeA_drug2 | 189 | 0.38 | 0.027 |
301
+ | TypeB_drug1 | 312 | 0.51 | 0.045 |
302
+
303
+ If only `perturbation` column is provided, evaluation is done per-perturbation.
304
+
305
+ #### Available DEG Methods
306
+
307
+ | Method | Description | Speed |
308
+ |--------|-------------|-------|
309
+ | `welch` | Welch's t-test (unequal variance) | ⚡ Fast |
310
+ | `student` | Student's t-test (equal variance) | ⚡ Fast |
311
+ | `wilcoxon` | Wilcoxon rank-sum (non-parametric) | 🐢 Slower |
312
+ | `logfc` | Log fold change only (no p-value) | ⚡⚡ Fastest |
313
+
314
+ #### Visualization
315
+
316
+ ```python
317
+ from geneval.deg import (
318
+ plot_deg_distributions,
319
+ plot_context_heatmap,
320
+ plot_deg_counts,
321
+ create_deg_report,
322
+ )
323
+
324
+ # Distribution of metrics across contexts
325
+ plot_deg_distributions(results, save_path="dist.png")
326
+
327
+ # Heatmap: context × metric
328
+ plot_context_heatmap(results, save_path="heatmap.png")
329
+
330
+ # DEG counts per context (up/down regulated)
331
+ plot_deg_counts(results, save_path="deg_counts.png")
332
+
333
+ # Generate comprehensive report
334
+ create_deg_report(results, "report/", include_plots=True)
335
+ ```
336
+
176
337
  ## Expected Data Format
177
338
 
178
339
  GenEval expects AnnData (h5ad) files with:
@@ -38,8 +38,12 @@ All metrics are computed **per-gene** (returning a vector) and **aggregated**:
38
38
  - ✅ Condition-based matching (perturbation, cell type, etc.)
39
39
  - ✅ Train/test split support
40
40
  - ✅ Per-gene and aggregate metrics
41
+ - ✅ **DEG-focused evaluation** with per-context (covariate × perturbation) support
42
+ - ✅ **Fast DEG detection** via vectorized Welch's t-test, Student's t-test, Wilcoxon
41
43
  - ✅ **Memory-efficient lazy loading** for large datasets
42
44
  - ✅ **Batched evaluation** to avoid OOM errors
45
+ - ✅ **CPU parallelization** via joblib (multi-core speedup)
46
+ - ✅ **GPU acceleration** via PyTorch (10-100x speedup)
43
47
  - ✅ Modular, extensible architecture
44
48
  - ✅ Command-line interface
45
49
  - ✅ Publication-quality visualizations
@@ -134,6 +138,162 @@ with load_data_lazy("real.h5ad", "gen.h5ad", ["perturbation"]) as loader:
134
138
  pass
135
139
  ```
136
140
 
141
+ ### Accelerated Evaluation (CPU Parallelization & GPU)
142
+
143
+ GenEval supports CPU parallelization and GPU acceleration for significant speedups:
144
+
145
+ ```python
146
+ from geneval import evaluate, get_available_backends
147
+
148
+ # Check available backends
149
+ print(get_available_backends())
150
+ # {'joblib': True, 'torch': True, 'geomloss': True, 'cuda': True, 'mps': False}
151
+
152
+ # Parallel CPU evaluation (use all cores)
153
+ results = evaluate(
154
+ real_path="real.h5ad",
155
+ generated_path="generated.h5ad",
156
+ condition_columns=["perturbation"],
157
+ n_jobs=-1, # Use all available CPU cores
158
+ )
159
+
160
+ # GPU-accelerated evaluation
161
+ results = evaluate(
162
+ real_path="real.h5ad",
163
+ generated_path="generated.h5ad",
164
+ condition_columns=["perturbation"],
165
+ device="cuda", # Use NVIDIA GPU
166
+ )
167
+
168
+ # Combined: parallel CPU + auto device selection
169
+ results = evaluate(..., n_jobs=8, device="auto")
170
+ ```
171
+
172
+ #### Low-level Accelerated API
173
+
174
+ For custom workflows, use the accelerated metrics directly:
175
+
176
+ ```python
177
+ from geneval.metrics.accelerated import (
178
+ compute_metrics_accelerated,
179
+ GPUWasserstein1,
180
+ GPUMMD,
181
+ vectorized_wasserstein1,
182
+ )
183
+ import numpy as np
184
+
185
+ # Load your data
186
+ real = np.random.randn(1000, 5000) # 1000 cells, 5000 genes
187
+ generated = np.random.randn(1000, 5000)
188
+
189
+ # Compute multiple metrics with acceleration
190
+ results = compute_metrics_accelerated(
191
+ real, generated,
192
+ metrics=["wasserstein_1", "wasserstein_2", "mmd", "energy"],
193
+ n_jobs=8, # CPU parallelization
194
+ device="cuda", # GPU acceleration
195
+ verbose=True,
196
+ )
197
+
198
+ # Access results
199
+ print(f"W1: {results['wasserstein_1'].aggregate_value:.4f}")
200
+ print(f"MMD: {results['mmd'].aggregate_value:.4f}")
201
+ ```
202
+
203
+ #### Performance Tips
204
+
205
+ | Optimization | Speedup | When to Use |
206
+ |--------------|---------|-------------|
207
+ | `n_jobs=-1` (all cores) | 4-16x | Always (if joblib available) |
208
+ | `device="cuda"` | 10-100x | Large datasets, NVIDIA GPU available |
209
+ | `device="mps"` | 5-20x | Apple Silicon Macs |
210
+ | Vectorized NumPy | 2-5x | Automatic fallback |
211
+
212
+ ### DEG-Focused Evaluation
213
+
214
+ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, computing metrics only on biologically relevant DEGs rather than all genes. This provides more meaningful evaluation for perturbation prediction tasks.
215
+
216
+ #### Key Features
217
+
218
+ - **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
219
+ - **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
220
+ - **GPU acceleration**: DEG detection and metrics on GPU for large datasets
221
+ - **Comprehensive reporting**: Aggregated and expanded results with visualizations
222
+
223
+ #### Quick Start
224
+
225
+ ```python
226
+ from geneval import evaluate_degs
227
+ import pandas as pd
228
+
229
+ # Evaluate with DEG-focused metrics
230
+ results = evaluate_degs(
231
+ real_data=real_adata.X, # (n_samples, n_genes)
232
+ generated_data=gen_adata.X,
233
+ real_obs=real_adata.obs,
234
+ generated_obs=gen_adata.obs,
235
+ condition_columns=["cell_type", "perturbation"], # Context columns
236
+ control_key="control", # Value indicating control samples
237
+ perturbation_column="perturbation",
238
+ deg_method="welch", # or "student", "wilcoxon", "logfc"
239
+ pval_threshold=0.05,
240
+ lfc_threshold=0.5, # log2 fold change threshold
241
+ device="cuda", # GPU acceleration
242
+ )
243
+
244
+ # Access results
245
+ print(results.aggregated_metrics) # Summary across all contexts
246
+ print(results.expanded_metrics) # Per-context metrics
247
+ print(results.deg_summary) # DEG counts per context
248
+
249
+ # Save results with plots
250
+ results.save("deg_evaluation/")
251
+ ```
252
+
253
+ #### Per-Context Evaluation
254
+
255
+ When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
256
+
257
+ | Context | n_DEGs | W1 (DEGs only) | MMD (DEGs only) |
258
+ |---------|--------|----------------|-----------------|
259
+ | TypeA_drug1 | 234 | 0.42 | 0.031 |
260
+ | TypeA_drug2 | 189 | 0.38 | 0.027 |
261
+ | TypeB_drug1 | 312 | 0.51 | 0.045 |
262
+
263
+ If only `perturbation` column is provided, evaluation is done per-perturbation.
264
+
265
+ #### Available DEG Methods
266
+
267
+ | Method | Description | Speed |
268
+ |--------|-------------|-------|
269
+ | `welch` | Welch's t-test (unequal variance) | ⚡ Fast |
270
+ | `student` | Student's t-test (equal variance) | ⚡ Fast |
271
+ | `wilcoxon` | Wilcoxon rank-sum (non-parametric) | 🐢 Slower |
272
+ | `logfc` | Log fold change only (no p-value) | ⚡⚡ Fastest |
273
+
274
+ #### Visualization
275
+
276
+ ```python
277
+ from geneval.deg import (
278
+ plot_deg_distributions,
279
+ plot_context_heatmap,
280
+ plot_deg_counts,
281
+ create_deg_report,
282
+ )
283
+
284
+ # Distribution of metrics across contexts
285
+ plot_deg_distributions(results, save_path="dist.png")
286
+
287
+ # Heatmap: context × metric
288
+ plot_context_heatmap(results, save_path="heatmap.png")
289
+
290
+ # DEG counts per context (up/down regulated)
291
+ plot_deg_counts(results, save_path="deg_counts.png")
292
+
293
+ # Generate comprehensive report
294
+ create_deg_report(results, "report/", include_plots=True)
295
+ ```
296
+
137
297
  ## Expected Data Format
138
298
 
139
299
  GenEval expects AnnData (h5ad) files with:
@@ -1,13 +1,13 @@
1
1
  [tool.poetry]
2
2
  name = "gengeneeval"
3
- version = "0.2.1"
4
- description = "Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, train/test splits, memory-efficient lazy loading, and publication-quality visualizations."
3
+ version = "0.4.0"
4
+ description = "Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations."
5
5
  authors = ["GenEval Team <geneval@example.com>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
8
  homepage = "https://github.com/AndreaRubbi/GenGeneEval"
9
9
  repository = "https://github.com/AndreaRubbi/GenGeneEval"
10
- keywords = ["gene expression", "evaluation", "metrics", "single-cell", "generative models", "benchmarking", "memory-efficient"]
10
+ keywords = ["gene expression", "evaluation", "metrics", "single-cell", "generative models", "benchmarking", "memory-efficient", "DEG", "perturbation"]
11
11
  classifiers = [
12
12
  "Development Status :: 4 - Beta",
13
13
  "Intended Audience :: Science/Research",
@@ -29,6 +29,7 @@ scipy = ">=1.7.0"
29
29
  torch = ">=1.9.0"
30
30
  matplotlib = ">=3.5.0"
31
31
  seaborn = ">=0.11.0"
32
+ joblib = ">=1.0.0"
32
33
  geomloss = {version = ">=0.2.1", optional = true}
33
34
  pykeops = {version = ">=1.4.0", optional = true}
34
35
  umap-learn = {version = ">=0.5.0", optional = true}
@@ -7,8 +7,10 @@ and generated gene expression datasets stored in AnnData (h5ad) format.
7
7
  Features:
8
8
  - Multiple distance and correlation metrics (per-gene and aggregate)
9
9
  - Condition-based matching (perturbation, cell type, etc.)
10
+ - DEG-focused evaluation with per-context (covariate × perturbation) support
10
11
  - Train/test split support
11
12
  - Memory-efficient lazy loading for large datasets
13
+ - CPU parallelization and GPU acceleration
12
14
  - Publication-quality visualizations
13
15
  - Command-line interface
14
16
 
@@ -21,6 +23,17 @@ Quick Start:
21
23
  ... output_dir="output/"
22
24
  ... )
23
25
 
26
+ DEG-Focused Evaluation:
27
+ >>> from geneval import evaluate_degs
28
+ >>> results = evaluate_degs(
29
+ ... real_data, generated_data,
30
+ ... real_obs, generated_obs,
31
+ ... condition_columns=["cell_type", "perturbation"],
32
+ ... control_key="control",
33
+ ... deg_method="welch",
34
+ ... device="cuda", # GPU acceleration
35
+ ... )
36
+
24
37
  Memory-Efficient Mode (for large datasets):
25
38
  >>> from geneval import evaluate_lazy
26
39
  >>> results = evaluate_lazy(
@@ -36,7 +49,7 @@ CLI Usage:
36
49
  --conditions perturbation cell_type --output results/
37
50
  """
38
51
 
39
- __version__ = "0.2.1"
52
+ __version__ = "0.4.0"
40
53
  __author__ = "GenEval Team"
41
54
 
42
55
  # Main evaluation interface
@@ -101,6 +114,30 @@ from .metrics.reconstruction import (
101
114
  R2Score,
102
115
  )
103
116
 
117
+ # Accelerated computation
118
+ from .metrics.accelerated import (
119
+ AccelerationConfig,
120
+ ParallelMetricComputer,
121
+ get_available_backends,
122
+ compute_metrics_accelerated,
123
+ )
124
+
125
+ # DEG-focused evaluation
126
+ from .deg import (
127
+ DEGEvaluator,
128
+ DEGResult,
129
+ DEGEvaluationResult,
130
+ ContextEvaluator,
131
+ ContextResult,
132
+ compute_degs_fast,
133
+ compute_degs_gpu,
134
+ get_contexts,
135
+ plot_deg_distributions,
136
+ plot_context_heatmap,
137
+ create_deg_report,
138
+ )
139
+ from .deg.evaluator import evaluate_degs
140
+
104
141
  # Visualization
105
142
  from .visualization.visualizer import (
106
143
  EvaluationVisualizer,
@@ -161,6 +198,24 @@ __all__ = [
161
198
  "RMSEDistance",
162
199
  "MAEDistance",
163
200
  "R2Score",
201
+ # Acceleration
202
+ "AccelerationConfig",
203
+ "ParallelMetricComputer",
204
+ "get_available_backends",
205
+ "compute_metrics_accelerated",
206
+ # DEG evaluation
207
+ "DEGEvaluator",
208
+ "DEGResult",
209
+ "DEGEvaluationResult",
210
+ "ContextEvaluator",
211
+ "ContextResult",
212
+ "compute_degs_fast",
213
+ "compute_degs_gpu",
214
+ "evaluate_degs",
215
+ "get_contexts",
216
+ "plot_deg_distributions",
217
+ "plot_context_heatmap",
218
+ "create_deg_report",
164
219
  # Visualization
165
220
  "EvaluationVisualizer",
166
221
  "visualize",
@@ -0,0 +1,65 @@
1
+ """
2
+ Differentially Expressed Genes (DEG) module for GenGeneEval.
3
+
4
+ This module provides:
5
+ - Fast DEG detection using vectorized statistical tests
6
+ - Per-context evaluation (covariates × perturbations)
7
+ - DEG-focused metrics computation
8
+ - Integration with GPU acceleration
9
+
10
+ Example usage:
11
+ >>> from geneval.deg import DEGEvaluator, compute_degs_fast
12
+ >>> degs = compute_degs_fast(control_data, perturbed_data, method="welch")
13
+ >>> evaluator = DEGEvaluator(loader, deg_method="welch", pval_threshold=0.05)
14
+ >>> results = evaluator.evaluate()
15
+ """
16
+
17
+ from .detection import (
18
+ compute_degs_fast,
19
+ compute_degs_gpu,
20
+ compute_degs_auto,
21
+ DEGResult,
22
+ DEGMethod,
23
+ )
24
+ from .context import (
25
+ ContextEvaluator,
26
+ ContextResult,
27
+ get_contexts,
28
+ get_context_id,
29
+ filter_by_context,
30
+ )
31
+ from .evaluator import (
32
+ DEGEvaluator,
33
+ DEGEvaluationResult,
34
+ evaluate_degs,
35
+ )
36
+ from .visualization import (
37
+ plot_deg_distributions,
38
+ plot_context_heatmap,
39
+ plot_deg_counts,
40
+ create_deg_report,
41
+ )
42
+
43
+ __all__ = [
44
+ # Detection
45
+ "compute_degs_fast",
46
+ "compute_degs_gpu",
47
+ "compute_degs_auto",
48
+ "DEGResult",
49
+ "DEGMethod",
50
+ # Context
51
+ "ContextEvaluator",
52
+ "ContextResult",
53
+ "get_contexts",
54
+ "get_context_id",
55
+ "filter_by_context",
56
+ # Evaluator
57
+ "DEGEvaluator",
58
+ "DEGEvaluationResult",
59
+ "evaluate_degs",
60
+ # Visualization
61
+ "plot_deg_distributions",
62
+ "plot_context_heatmap",
63
+ "plot_deg_counts",
64
+ "create_deg_report",
65
+ ]