gengeneeval 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/PKG-INFO +46 -11
  2. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/README.md +45 -10
  3. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/pyproject.toml +1 -1
  4. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/__init__.py +1 -1
  5. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/__init__.py +4 -0
  6. gengeneeval-0.4.1/src/geneval/deg/evaluator.py +821 -0
  7. gengeneeval-0.4.0/src/geneval/deg/evaluator.py +0 -538
  8. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/LICENSE +0 -0
  9. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/cli.py +0 -0
  10. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/config.py +0 -0
  11. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/core.py +0 -0
  12. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/__init__.py +0 -0
  13. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/gene_expression_datamodule.py +0 -0
  14. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/lazy_loader.py +0 -0
  15. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/data/loader.py +0 -0
  16. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/context.py +0 -0
  17. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/detection.py +0 -0
  18. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/deg/visualization.py +0 -0
  19. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluator.py +0 -0
  20. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluators/__init__.py +0 -0
  21. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluators/base_evaluator.py +0 -0
  22. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/evaluators/gene_expression_evaluator.py +0 -0
  23. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/lazy_evaluator.py +0 -0
  24. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/__init__.py +0 -0
  25. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/accelerated.py +0 -0
  26. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/base_metric.py +0 -0
  27. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/correlation.py +0 -0
  28. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/distances.py +0 -0
  29. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/metrics.py +0 -0
  30. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/metrics/reconstruction.py +0 -0
  31. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/models/__init__.py +0 -0
  32. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/models/base_model.py +0 -0
  33. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/results.py +0 -0
  34. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/testing.py +0 -0
  35. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/utils/__init__.py +0 -0
  36. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/utils/io.py +0 -0
  37. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/utils/preprocessing.py +0 -0
  38. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/visualization/__init__.py +0 -0
  39. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/visualization/plots.py +0 -0
  40. {gengeneeval-0.4.0 → gengeneeval-0.4.1}/src/geneval/visualization/visualizer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gengeneeval
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -256,6 +256,8 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
256
256
  #### Key Features
257
257
 
258
258
  - **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
259
+ - **DEG vs all-genes comparison**: Compute metrics on both and compare
260
+ - **Flexible DEG selection**: Top N by significance, or threshold-based filtering
259
261
  - **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
260
262
  - **GPU acceleration**: DEG detection and metrics on GPU for large datasets
261
263
  - **Comprehensive reporting**: Aggregated and expanded results with visualizations
@@ -266,7 +268,7 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
266
268
  from geneval import evaluate_degs
267
269
  import pandas as pd
268
270
 
269
- # Evaluate with DEG-focused metrics
271
+ # Evaluate with DEG-focused metrics (computes both DEG and all-genes by default)
270
272
  results = evaluate_degs(
271
273
  real_data=real_adata.X, # (n_samples, n_genes)
272
274
  generated_data=gen_adata.X,
@@ -276,29 +278,62 @@ results = evaluate_degs(
276
278
  control_key="control", # Value indicating control samples
277
279
  perturbation_column="perturbation",
278
280
  deg_method="welch", # or "student", "wilcoxon", "logfc"
279
- pval_threshold=0.05,
281
+ pval_threshold=0.05, # Significance threshold
280
282
  lfc_threshold=0.5, # log2 fold change threshold
283
+ compute_all_genes=True, # Also compute metrics on all genes
281
284
  device="cuda", # GPU acceleration
282
285
  )
283
286
 
284
- # Access results
285
- print(results.aggregated_metrics) # Summary across all contexts
286
- print(results.expanded_metrics) # Per-context metrics
287
+ # Compare DEG-only vs all-genes metrics
288
+ print(results.comparison_summary)
289
+ # metric deg_mean all_mean difference ratio
290
+ # wasserstein_1 5.34 0.69 4.65 7.74
291
+ # mmd 1.14 0.13 1.02 9.00
292
+
293
+ # Access per-context results
294
+ print(results.expanded_metrics) # Has deg_* and all_* columns
287
295
  print(results.deg_summary) # DEG counts per context
288
296
 
289
297
  # Save results with plots
290
298
  results.save("deg_evaluation/")
291
299
  ```
292
300
 
301
+ #### DEG Selection Control
302
+
303
+ ```python
304
+ # Option 1: Top N most significant DEGs
305
+ results = evaluate_degs(
306
+ ...,
307
+ n_top_degs=50, # Use only top 50 DEGs by adjusted p-value
308
+ )
309
+
310
+ # Option 2: Stricter thresholds
311
+ results = evaluate_degs(
312
+ ...,
313
+ pval_threshold=0.01, # More stringent p-value
314
+ lfc_threshold=1.0, # 2-fold change minimum
315
+ )
316
+
317
+ # Option 3: DEGs only (skip all-genes metrics for speed)
318
+ results = evaluate_degs(
319
+ ...,
320
+ compute_all_genes=False,
321
+ )
322
+
323
+ # Get DEG-only or all-genes metrics separately
324
+ deg_only = results.get_deg_only_metrics()
325
+ all_genes = results.get_all_genes_metrics()
326
+ ```
327
+
293
328
  #### Per-Context Evaluation
294
329
 
295
330
  When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
296
331
 
297
- | Context | n_DEGs | W1 (DEGs only) | MMD (DEGs only) |
298
- |---------|--------|----------------|-----------------|
299
- | TypeA_drug1 | 234 | 0.42 | 0.031 |
300
- | TypeA_drug2 | 189 | 0.38 | 0.027 |
301
- | TypeB_drug1 | 312 | 0.51 | 0.045 |
332
+ | Context | n_DEGs | deg_W1 | all_W1 | deg_MMD | all_MMD |
333
+ |---------|--------|--------|--------|---------|---------|
334
+ | TypeA_drug1 | 234 | 5.42 | 0.69 | 1.03 | 0.13 |
335
+ | TypeA_drug2 | 189 | 4.38 | 0.71 | 0.92 | 0.12 |
336
+ | TypeB_drug1 | 312 | 6.51 | 0.68 | 1.21 | 0.14 |
302
337
 
303
338
  If only `perturbation` column is provided, evaluation is done per-perturbation.
304
339
 
@@ -216,6 +216,8 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
216
216
  #### Key Features
217
217
 
218
218
  - **Fast DEG detection**: Vectorized Welch's t-test, Student's t-test, or Wilcoxon rank-sum
219
+ - **DEG vs all-genes comparison**: Compute metrics on both and compare
220
+ - **Flexible DEG selection**: Top N by significance, or threshold-based filtering
219
221
  - **Per-context evaluation**: Automatically evaluates each (covariate × perturbation) combination
220
222
  - **GPU acceleration**: DEG detection and metrics on GPU for large datasets
221
223
  - **Comprehensive reporting**: Aggregated and expanded results with visualizations
@@ -226,7 +228,7 @@ GenEval supports **Differentially Expressed Genes (DEG)-focused evaluation**, co
226
228
  from geneval import evaluate_degs
227
229
  import pandas as pd
228
230
 
229
- # Evaluate with DEG-focused metrics
231
+ # Evaluate with DEG-focused metrics (computes both DEG and all-genes by default)
230
232
  results = evaluate_degs(
231
233
  real_data=real_adata.X, # (n_samples, n_genes)
232
234
  generated_data=gen_adata.X,
@@ -236,29 +238,62 @@ results = evaluate_degs(
236
238
  control_key="control", # Value indicating control samples
237
239
  perturbation_column="perturbation",
238
240
  deg_method="welch", # or "student", "wilcoxon", "logfc"
239
- pval_threshold=0.05,
241
+ pval_threshold=0.05, # Significance threshold
240
242
  lfc_threshold=0.5, # log2 fold change threshold
243
+ compute_all_genes=True, # Also compute metrics on all genes
241
244
  device="cuda", # GPU acceleration
242
245
  )
243
246
 
244
- # Access results
245
- print(results.aggregated_metrics) # Summary across all contexts
246
- print(results.expanded_metrics) # Per-context metrics
247
+ # Compare DEG-only vs all-genes metrics
248
+ print(results.comparison_summary)
249
+ # metric deg_mean all_mean difference ratio
250
+ # wasserstein_1 5.34 0.69 4.65 7.74
251
+ # mmd 1.14 0.13 1.02 9.00
252
+
253
+ # Access per-context results
254
+ print(results.expanded_metrics) # Has deg_* and all_* columns
247
255
  print(results.deg_summary) # DEG counts per context
248
256
 
249
257
  # Save results with plots
250
258
  results.save("deg_evaluation/")
251
259
  ```
252
260
 
261
+ #### DEG Selection Control
262
+
263
+ ```python
264
+ # Option 1: Top N most significant DEGs
265
+ results = evaluate_degs(
266
+ ...,
267
+ n_top_degs=50, # Use only top 50 DEGs by adjusted p-value
268
+ )
269
+
270
+ # Option 2: Stricter thresholds
271
+ results = evaluate_degs(
272
+ ...,
273
+ pval_threshold=0.01, # More stringent p-value
274
+ lfc_threshold=1.0, # 2-fold change minimum
275
+ )
276
+
277
+ # Option 3: DEGs only (skip all-genes metrics for speed)
278
+ results = evaluate_degs(
279
+ ...,
280
+ compute_all_genes=False,
281
+ )
282
+
283
+ # Get DEG-only or all-genes metrics separately
284
+ deg_only = results.get_deg_only_metrics()
285
+ all_genes = results.get_all_genes_metrics()
286
+ ```
287
+
253
288
  #### Per-Context Evaluation
254
289
 
255
290
  When multiple condition columns are provided (e.g., `["cell_type", "perturbation"]`), GenEval evaluates **every combination** separately:
256
291
 
257
- | Context | n_DEGs | W1 (DEGs only) | MMD (DEGs only) |
258
- |---------|--------|----------------|-----------------|
259
- | TypeA_drug1 | 234 | 0.42 | 0.031 |
260
- | TypeA_drug2 | 189 | 0.38 | 0.027 |
261
- | TypeB_drug1 | 312 | 0.51 | 0.045 |
292
+ | Context | n_DEGs | deg_W1 | all_W1 | deg_MMD | all_MMD |
293
+ |---------|--------|--------|--------|---------|---------|
294
+ | TypeA_drug1 | 234 | 5.42 | 0.69 | 1.03 | 0.13 |
295
+ | TypeA_drug2 | 189 | 4.38 | 0.71 | 0.92 | 0.12 |
296
+ | TypeB_drug1 | 312 | 6.51 | 0.68 | 1.21 | 0.14 |
262
297
 
263
298
  If only `perturbation` column is provided, evaluation is done per-perturbation.
264
299
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "gengeneeval"
3
- version = "0.4.0"
3
+ version = "0.4.1"
4
4
  description = "Comprehensive evaluation of generated gene expression data. Computes metrics between real and generated datasets with support for condition matching, DEG-focused evaluation, per-context analysis, train/test splits, memory-efficient lazy loading, CPU parallelization, GPU acceleration, and publication-quality visualizations."
5
5
  authors = ["GenEval Team <geneval@example.com>"]
6
6
  license = "MIT"
@@ -49,7 +49,7 @@ CLI Usage:
49
49
  --conditions perturbation cell_type --output results/
50
50
  """
51
51
 
52
- __version__ = "0.4.0"
52
+ __version__ = "0.4.1"
53
53
  __author__ = "GenEval Team"
54
54
 
55
55
  # Main evaluation interface
@@ -31,6 +31,8 @@ from .context import (
31
31
  from .evaluator import (
32
32
  DEGEvaluator,
33
33
  DEGEvaluationResult,
34
+ DEGSettings,
35
+ ContextMetrics,
34
36
  evaluate_degs,
35
37
  )
36
38
  from .visualization import (
@@ -56,6 +58,8 @@ __all__ = [
56
58
  # Evaluator
57
59
  "DEGEvaluator",
58
60
  "DEGEvaluationResult",
61
+ "DEGSettings",
62
+ "ContextMetrics",
59
63
  "evaluate_degs",
60
64
  # Visualization
61
65
  "plot_deg_distributions",