rosetta-bioc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. rosetta_bioc-0.1.0/PKG-INFO +144 -0
  2. rosetta_bioc-0.1.0/README.md +116 -0
  3. rosetta_bioc-0.1.0/pyproject.toml +42 -0
  4. rosetta_bioc-0.1.0/rosetta/__init__.py +23 -0
  5. rosetta_bioc-0.1.0/rosetta/__main__.py +3 -0
  6. rosetta_bioc-0.1.0/rosetta/_bridge.py +64 -0
  7. rosetta_bioc-0.1.0/rosetta/_deps.py +19 -0
  8. rosetta_bioc-0.1.0/rosetta/_errors.py +24 -0
  9. rosetta_bioc-0.1.0/rosetta/example.py +65 -0
  10. rosetta_bioc-0.1.0/rosetta/pipelines.py +160 -0
  11. rosetta_bioc-0.1.0/rosetta/results.py +119 -0
  12. rosetta_bioc-0.1.0/rosetta/stats/__init__.py +0 -0
  13. rosetta_bioc-0.1.0/rosetta/stats/decide.py +32 -0
  14. rosetta_bioc-0.1.0/rosetta/stats/design.py +28 -0
  15. rosetta_bioc-0.1.0/rosetta/stats/treat.py +13 -0
  16. rosetta_bioc-0.1.0/rosetta/wrappers/__init__.py +1 -0
  17. rosetta_bioc-0.1.0/rosetta/wrappers/clusterprofiler.py +103 -0
  18. rosetta_bioc-0.1.0/rosetta/wrappers/deseq2.py +176 -0
  19. rosetta_bioc-0.1.0/rosetta/wrappers/edger.py +76 -0
  20. rosetta_bioc-0.1.0/rosetta/wrappers/limma.py +81 -0
  21. rosetta_bioc-0.1.0/rosetta/wrappers/phyloseq.py +85 -0
  22. rosetta_bioc-0.1.0/rosetta/wrappers/seurat.py +78 -0
  23. rosetta_bioc-0.1.0/rosetta_bioc.egg-info/PKG-INFO +144 -0
  24. rosetta_bioc-0.1.0/rosetta_bioc.egg-info/SOURCES.txt +34 -0
  25. rosetta_bioc-0.1.0/rosetta_bioc.egg-info/dependency_links.txt +1 -0
  26. rosetta_bioc-0.1.0/rosetta_bioc.egg-info/requires.txt +10 -0
  27. rosetta_bioc-0.1.0/rosetta_bioc.egg-info/top_level.txt +1 -0
  28. rosetta_bioc-0.1.0/setup.cfg +4 -0
  29. rosetta_bioc-0.1.0/tests/test_bridge.py +104 -0
  30. rosetta_bioc-0.1.0/tests/test_clusterprofiler.py +92 -0
  31. rosetta_bioc-0.1.0/tests/test_deps.py +26 -0
  32. rosetta_bioc-0.1.0/tests/test_errors.py +25 -0
  33. rosetta_bioc-0.1.0/tests/test_init.py +67 -0
  34. rosetta_bioc-0.1.0/tests/test_phyloseq.py +57 -0
  35. rosetta_bioc-0.1.0/tests/test_report.py +82 -0
  36. rosetta_bioc-0.1.0/tests/test_seurat.py +79 -0
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.4
2
+ Name: rosetta-bioc
3
+ Version: 0.1.0
4
+ Summary: Pythonic API for R/Bioconductor statistical methods — calls validated R code, returns pandas DataFrames.
5
+ Author: Catherine Chi Chung
6
+ Author-email: John Muirhead-Gould <john@nodes.bio>
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/rosetta-bioc/rosetta
9
+ Project-URL: Documentation, https://github.com/rosetta-bioc/rosetta#readme
10
+ Project-URL: Repository, https://github.com/rosetta-bioc/rosetta
11
+ Project-URL: Issues, https://github.com/rosetta-bioc/rosetta/issues
12
+ Keywords: bioinformatics,R,bioconductor,DESeq2,edgeR,limma,rpy2,RNA-seq,differential-expression,pathway-enrichment,clusterProfiler,genomics,pandas
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: rpy2>=3.5
21
+ Requires-Dist: pandas>=1.5
22
+ Requires-Dist: numpy>=1.23
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=7; extra == "dev"
25
+ Requires-Dist: ruff>=0.5; extra == "dev"
26
+ Provides-Extra: posit
27
+ Requires-Dist: rpy2>=3.5; extra == "posit"
28
+
29
+ # 🪨 rosetta
30
+
31
+ **Python interface to R/Bioconductor — pandas in, pandas out, `.report()` when you're done.**
32
+
33
+ [![PyPI](https://img.shields.io/pypi/v/rosetta-bioc)](https://pypi.org/project/rosetta-bioc/)
34
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
35
+ [![Tests](https://img.shields.io/badge/tests-170%2B%20passing-brightgreen)]()
36
+
37
+ ```bash
38
+ pip install rosetta-bioc
39
+ ```
40
+
41
+ ## 30-second demo
42
+
43
+ ```python
44
+ import rosetta as rb
45
+
46
+ # DESeq2 differential expression — one call, pandas out
47
+ results = rb.deseq2(counts_df, metadata_df, design="~ condition")
48
+ results.report()
49
+ ```
50
+ ```
51
+ DESeq2 Results Summary
52
+ ──────────────────────────────
53
+ Total genes tested: 12,000
54
+ Significant (padj<0.05): 843 (7.0%)
55
+ ↑ Upregulated: 428
56
+ ↓ Downregulated: 415
57
+ LFC range: [-4.71, 3.50]
58
+ ```
59
+
60
+ That's it. No R code. No rpy2 boilerplate. No type conversion. Just results.
61
+
62
+ ## What it wraps
63
+
64
+ | R Package | Python | What it does |
65
+ |-----------|--------|--------------|
66
+ | DESeq2 | `rb.deseq2()` | Differential expression (negative binomial) |
67
+ | edgeR | `rb.edger()` | Quasi-likelihood differential expression |
68
+ | limma | `rb.limma_voom()` | Linear models + TREAT significance |
69
+ | clusterProfiler | `rb.enrich_go()` | GO/KEGG/Reactome pathway enrichment |
70
+ | phyloseq | `rb.phyloseq()` | Microbiome diversity analysis |
71
+ | Seurat | `rb.seurat()` | Single-cell RNA-seq |
72
+
73
+ All functions return a `RosettaDataFrame` (pandas DataFrame subclass) with a `.report()` method.
74
+
75
+ ## Modular DESeq2 API
76
+
77
+ For more control, use the step-by-step interface:
78
+
79
+ ```python
80
+ from rosetta.wrappers.deseq2 import run_deseq2, get_results, lfc_shrink
81
+
82
+ dds = run_deseq2(counts_df, metadata_df, design="~ condition")
83
+ res = get_results(dds, contrast=["condition", "treated", "control"], alpha=0.05)
84
+ shrunk = lfc_shrink(dds, coef="condition_treated_vs_control", type="apeglm")
85
+
86
+ res.report()
87
+ shrunk.report()
88
+ ```
89
+
90
+ ## Enrichment analysis
91
+
92
+ ```python
93
+ import rosetta as rb
94
+
95
+ # Over-representation analysis
96
+ go_results = rb.enrich_go(gene_list, org_db="org.Hs.eg.db", ont="BP")
97
+ go_results.report()
98
+
99
+ # KEGG pathways
100
+ kegg = rb.enrich_kegg(gene_list, organism="hsa")
101
+ kegg.report()
102
+ ```
103
+
104
+ ## Setup
105
+
106
+ **Python side:**
107
+ ```bash
108
+ pip install rosetta-bioc
109
+ ```
110
+
111
+ **R side** (one-time):
112
+ ```bash
113
+ Rscript install.R
114
+ ```
115
+
116
+ Or manually:
117
+ ```r
118
+ BiocManager::install(c("DESeq2", "edgeR", "limma", "clusterProfiler"))
119
+ ```
120
+
121
+ **Posit Cloud:** See [docs/posit-cloud.md](docs/posit-cloud.md) for zero-config setup.
122
+
123
+ ## Requirements
124
+
125
+ - Python 3.9+
126
+ - R 4.0+ with Bioconductor
127
+ - rpy2 ≥ 3.5
128
+
129
+ ## Philosophy
130
+
131
+ 1. **Rosetta calls R — it doesn't reimplement it.** All statistics run in the original, validated R packages.
132
+ 2. **Pandas in, pandas out.** No R objects leak into your Python workflow.
133
+ 3. **Fail early, fail clearly.** Input validation happens in Python before crossing the R boundary.
134
+ 4. **`.report()` everything.** Results should be immediately interpretable without manual inspection.
135
+
136
+ ## Contributing
137
+
138
+ See [CONTRIBUTING.md](CONTRIBUTING.md). Good first issues are labeled — start with [Issue #1: `report()` enhancements](https://github.com/rosetta-bioc/rosetta/issues/1).
139
+
140
+ ## Acknowledgments
141
+
142
+ Built on [rpy2](https://rpy2.github.io/) and the extraordinary R/Bioconductor ecosystem. All credit for the statistical methods goes to the original R package authors.
143
+
144
+ GSoC 2026 · MIT License · [Nodes Bio](https://nodes.bio)
@@ -0,0 +1,116 @@
1
+ # 🪨 rosetta
2
+
3
+ **Python interface to R/Bioconductor — pandas in, pandas out, `.report()` when you're done.**
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/rosetta-bioc)](https://pypi.org/project/rosetta-bioc/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
7
+ [![Tests](https://img.shields.io/badge/tests-170%2B%20passing-brightgreen)]()
8
+
9
+ ```bash
10
+ pip install rosetta-bioc
11
+ ```
12
+
13
+ ## 30-second demo
14
+
15
+ ```python
16
+ import rosetta as rb
17
+
18
+ # DESeq2 differential expression — one call, pandas out
19
+ results = rb.deseq2(counts_df, metadata_df, design="~ condition")
20
+ results.report()
21
+ ```
22
+ ```
23
+ DESeq2 Results Summary
24
+ ──────────────────────────────
25
+ Total genes tested: 12,000
26
+ Significant (padj<0.05): 843 (7.0%)
27
+ ↑ Upregulated: 428
28
+ ↓ Downregulated: 415
29
+ LFC range: [-4.71, 3.50]
30
+ ```
31
+
32
+ That's it. No R code. No rpy2 boilerplate. No type conversion. Just results.
33
+
34
+ ## What it wraps
35
+
36
+ | R Package | Python | What it does |
37
+ |-----------|--------|--------------|
38
+ | DESeq2 | `rb.deseq2()` | Differential expression (negative binomial) |
39
+ | edgeR | `rb.edger()` | Quasi-likelihood differential expression |
40
+ | limma | `rb.limma_voom()` | Linear models + TREAT significance |
41
+ | clusterProfiler | `rb.enrich_go()` | GO/KEGG/Reactome pathway enrichment |
42
+ | phyloseq | `rb.phyloseq()` | Microbiome diversity analysis |
43
+ | Seurat | `rb.seurat()` | Single-cell RNA-seq |
44
+
45
+ All functions return a `RosettaDataFrame` (pandas DataFrame subclass) with a `.report()` method.
46
+
47
+ ## Modular DESeq2 API
48
+
49
+ For more control, use the step-by-step interface:
50
+
51
+ ```python
52
+ from rosetta.wrappers.deseq2 import run_deseq2, get_results, lfc_shrink
53
+
54
+ dds = run_deseq2(counts_df, metadata_df, design="~ condition")
55
+ res = get_results(dds, contrast=["condition", "treated", "control"], alpha=0.05)
56
+ shrunk = lfc_shrink(dds, coef="condition_treated_vs_control", type="apeglm")
57
+
58
+ res.report()
59
+ shrunk.report()
60
+ ```
61
+
62
+ ## Enrichment analysis
63
+
64
+ ```python
65
+ import rosetta as rb
66
+
67
+ # Over-representation analysis
68
+ go_results = rb.enrich_go(gene_list, org_db="org.Hs.eg.db", ont="BP")
69
+ go_results.report()
70
+
71
+ # KEGG pathways
72
+ kegg = rb.enrich_kegg(gene_list, organism="hsa")
73
+ kegg.report()
74
+ ```
75
+
76
+ ## Setup
77
+
78
+ **Python side:**
79
+ ```bash
80
+ pip install rosetta-bioc
81
+ ```
82
+
83
+ **R side** (one-time):
84
+ ```bash
85
+ Rscript install.R
86
+ ```
87
+
88
+ Or manually:
89
+ ```r
90
+ BiocManager::install(c("DESeq2", "edgeR", "limma", "clusterProfiler"))
91
+ ```
92
+
93
+ **Posit Cloud:** See [docs/posit-cloud.md](docs/posit-cloud.md) for zero-config setup.
94
+
95
+ ## Requirements
96
+
97
+ - Python 3.9+
98
+ - R 4.0+ with Bioconductor
99
+ - rpy2 ≥ 3.5
100
+
101
+ ## Philosophy
102
+
103
+ 1. **Rosetta calls R — it doesn't reimplement it.** All statistics run in the original, validated R packages.
104
+ 2. **Pandas in, pandas out.** No R objects leak into your Python workflow.
105
+ 3. **Fail early, fail clearly.** Input validation happens in Python before crossing the R boundary.
106
+ 4. **`.report()` everything.** Results should be immediately interpretable without manual inspection.
107
+
108
+ ## Contributing
109
+
110
+ See [CONTRIBUTING.md](CONTRIBUTING.md). Good first issues are labeled — start with [Issue #1: `report()` enhancements](https://github.com/rosetta-bioc/rosetta/issues/1).
111
+
112
+ ## Acknowledgments
113
+
114
+ Built on [rpy2](https://rpy2.github.io/) and the extraordinary R/Bioconductor ecosystem. All credit for the statistical methods goes to the original R package authors.
115
+
116
+ GSoC 2026 · MIT License · [Nodes Bio](https://nodes.bio)
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "rosetta-bioc"
7
+ version = "0.1.0"
8
+ description = "Pythonic API for R/Bioconductor statistical methods — calls validated R code, returns pandas DataFrames."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ authors = [
13
+ {name = "John Muirhead-Gould", email = "john@nodes.bio"},
14
+ {name = "Catherine Chi Chung"},
15
+ ]
16
+ keywords = ["bioinformatics", "R", "bioconductor", "DESeq2", "edgeR", "limma", "rpy2", "RNA-seq", "differential-expression", "pathway-enrichment", "clusterProfiler", "genomics", "pandas"]
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Science/Research",
20
+ "Programming Language :: Python :: 3",
21
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
22
+ "Topic :: Scientific/Engineering :: Medical Science Apps.",
23
+ ]
24
+ dependencies = [
25
+ "rpy2>=3.5",
26
+ "pandas>=1.5",
27
+ "numpy>=1.23",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = ["pytest>=7", "ruff>=0.5"]
32
+ posit = ["rpy2>=3.5"]
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/rosetta-bioc/rosetta"
36
+ Documentation = "https://github.com/rosetta-bioc/rosetta#readme"
37
+ Repository = "https://github.com/rosetta-bioc/rosetta"
38
+ Issues = "https://github.com/rosetta-bioc/rosetta/issues"
39
+
40
+ [tool.setuptools.packages.find]
41
+ include = ["rosetta*"]
42
+ exclude = ["tests*", "examples*"]
@@ -0,0 +1,23 @@
1
+ """rosetta — Seamless Python wrappers for R bioinformatics packages."""
2
+
3
+ from ._errors import RDataError, RFormulaError, RPackageMissing
4
+ from .results import RosettaDataFrame
5
+ from .wrappers.deseq2 import deseq2
6
+ from .wrappers.edger import edger
7
+ from .wrappers.limma import limma_voom
8
+ from .wrappers.clusterprofiler import enrich_go, enrich_kegg, enrich_pathway, enrich_custom
9
+ from .wrappers.phyloseq import phyloseq, phyloseq_richness
10
+ from .wrappers.seurat import seurat
11
+ from . import pipelines
12
+
13
+ # Alias for backward compatibility
14
+ enrichment = enrich_go
15
+
16
+ __all__ = [
17
+ "deseq2", "edger", "limma_voom", "enrichment",
18
+ "enrich_go", "enrich_kegg", "enrich_pathway", "enrich_custom",
19
+ "phyloseq", "phyloseq_richness", "seurat",
20
+ "pipelines",
21
+ "RosettaDataFrame",
22
+ "RDataError", "RFormulaError", "RPackageMissing",
23
+ ]
@@ -0,0 +1,3 @@
1
+ """python -m rosetta — run the quick-start demo."""
2
+ from .example import main
3
+ main()
@@ -0,0 +1,64 @@
1
+ """R session management and bidirectional type conversion."""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import rpy2.robjects as ro
6
+ from rpy2.robjects import numpy2ri, pandas2ri
7
+ from rpy2.robjects.conversion import Converter, localconverter
8
+ from rpy2.robjects.packages import importr
9
+
10
+ _converter = Converter("rosetta")
11
+ _converter += numpy2ri.converter
12
+ _converter += pandas2ri.converter
13
+ _converter += ro.default_converter
14
+
15
+ _base = None
16
+
17
+
18
+ def _get_base():
19
+ """Lazily import R base package."""
20
+ global _base
21
+ if _base is None:
22
+ _base = importr("base")
23
+ return _base
24
+
25
+
26
+ def to_r_matrix(df: pd.DataFrame):
27
+ """Convert pandas DataFrame to R matrix."""
28
+ from ._errors import RDataError
29
+ if not isinstance(df, pd.DataFrame):
30
+ raise RDataError("Expected pandas DataFrame")
31
+ with localconverter(_converter):
32
+ return _get_base().as_matrix(ro.conversion.get_conversion().py2rpy(df))
33
+
34
+
35
+ def to_r_dataframe(df: pd.DataFrame):
36
+ """Convert pandas DataFrame to R data.frame."""
37
+ from ._errors import RDataError
38
+ if not isinstance(df, pd.DataFrame):
39
+ raise RDataError("Expected pandas DataFrame")
40
+ with localconverter(_converter):
41
+ return ro.conversion.get_conversion().py2rpy(df)
42
+
43
+
44
+ def to_pandas(r_obj) -> "pd.DataFrame":
45
+ """Convert R data.frame/matrix to pandas DataFrame (with .report() method)."""
46
+ from .results import RosettaDataFrame
47
+ with localconverter(_converter):
48
+ df = ro.conversion.get_conversion().rpy2py(r_obj)
49
+ if isinstance(df, pd.DataFrame):
50
+ return RosettaDataFrame(df)
51
+ return df
52
+
53
+
54
+ def to_r_df(r_obj):
55
+ """Convert an R object to R data.frame via base::as.data.frame."""
56
+ with localconverter(_converter):
57
+ return _get_base().as_data_frame(r_obj)
58
+
59
+
60
+ def r_nrow(r_obj):
61
+ """Get nrow of an R object via base::nrow."""
62
+ with localconverter(_converter):
63
+ result = _get_base().nrow(r_obj)
64
+ return int(result[0]) # Convert R vector to Python int
@@ -0,0 +1,19 @@
1
+ """R package detection and installation via BiocManager."""
2
+
3
+ from rpy2.robjects.conversion import localconverter
4
+
5
+ from ._bridge import _converter, _get_base
6
+ from ._errors import RPackageMissing
7
+
8
+
9
+ def is_installed(package: str) -> bool:
10
+ """Check if an R package is installed."""
11
+ with localconverter(_converter):
12
+ result = _get_base().requireNamespace(package, quietly=True)
13
+ return bool(result[0])
14
+
15
+
16
+ def ensure_installed(package: str) -> None:
17
+ """Ensure an R package is installed, raising RPackageMissing if not."""
18
+ if not is_installed(package):
19
+ raise RPackageMissing(package)
@@ -0,0 +1,24 @@
1
+ """Rosetta exception classes for R error translation."""
2
+
3
+
4
+ class RosettaError(Exception):
5
+ """Base exception for rosetta."""
6
+
7
+
8
+ class RPackageMissing(RosettaError):
9
+ """Required R package is not installed."""
10
+
11
+ def __init__(self, package: str):
12
+ self.package = package
13
+ super().__init__(f"R package '{package}' is not installed. Install with: R -e 'BiocManager::install(\"{package}\")'")
14
+
15
+
16
+ class RFormulaError(RosettaError):
17
+ """Invalid R design formula."""
18
+
19
+
20
+ class RDataError(RosettaError):
21
+ """Incompatible input data for R function."""
22
+ class RosettaSecurityError(RosettaError):
23
+ """Exception raised for security-related issues."""
24
+
@@ -0,0 +1,65 @@
1
+ """rosetta quick-start example — synthetic data, no files needed.
2
+
3
+ Run: python -m rosetta.example
4
+ """
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+
10
+ def main():
11
+ """Demonstrate rosetta with synthetic RNA-seq count data."""
12
+ from .results import RosettaDataFrame
13
+
14
+ print("🪨 rosetta — quick demo with synthetic data\n")
15
+
16
+ # Generate fake count matrix (1000 genes × 6 samples)
17
+ np.random.seed(42)
18
+ n_genes, n_samples = 1000, 6
19
+ gene_names = [f"Gene_{i:04d}" for i in range(n_genes)]
20
+ sample_names = [f"S{i+1}" for i in range(n_samples)]
21
+
22
+ # Base expression + condition effect for first 100 genes
23
+ base = np.random.negative_binomial(n=5, p=0.01, size=(n_genes, n_samples))
24
+ base[:100, 3:] += np.random.negative_binomial(n=3, p=0.01, size=(100, 3)) # upregulated in treated
25
+
26
+ counts = pd.DataFrame(base, index=gene_names, columns=sample_names)
27
+ metadata = pd.DataFrame(
28
+ {"condition": ["control"] * 3 + ["treated"] * 3},
29
+ index=sample_names,
30
+ )
31
+
32
+ print(f"Count matrix: {counts.shape[0]} genes × {counts.shape[1]} samples")
33
+ print(f"Conditions: {metadata['condition'].value_counts().to_dict()}\n")
34
+
35
+ # Simulate DESeq2-like results (without R, for demo purposes)
36
+ pvals = np.random.uniform(0, 1, n_genes)
37
+ pvals[:100] = np.random.uniform(0, 0.001, 100) # truly DE genes
38
+ lfc = np.random.normal(0, 0.5, n_genes)
39
+ lfc[:100] = np.random.normal(2.0, 0.8, 100)
40
+
41
+ results = RosettaDataFrame({
42
+ "baseMean": counts.mean(axis=1).values,
43
+ "log2FoldChange": lfc,
44
+ "lfcSE": np.abs(np.random.normal(0.3, 0.1, n_genes)),
45
+ "stat": lfc / 0.3,
46
+ "pvalue": pvals,
47
+ "padj": np.minimum(pvals * n_genes / np.arange(1, n_genes + 1), 1.0), # BH correction
48
+ }, index=gene_names)
49
+
50
+ print("─" * 40)
51
+ results.report()
52
+ print("─" * 40)
53
+
54
+ # Show top genes
55
+ sig = results[results["padj"] < 0.05].sort_values("log2FoldChange", ascending=False)
56
+ print(f"\nTop 5 upregulated genes:")
57
+ print(sig[["log2FoldChange", "padj"]].head().to_string())
58
+
59
+ print("\n✓ To run with real R packages:")
60
+ print(" results = rb.deseq2(counts, metadata, design='~ condition')")
61
+ print(" results.report()")
62
+
63
+
64
+ if __name__ == "__main__":
65
+ main()
@@ -0,0 +1,160 @@
1
+ """rosetta.pipelines — Complete analysis workflows in one call.
2
+
3
+ These are the "I just want results" functions. Each one runs the full
4
+ statistical pipeline and returns a RosettaDataFrame with .report().
5
+ """
6
+
7
+ import pandas as pd
8
+ from .results import RosettaDataFrame
9
+
10
+
11
+ def diff_expr(
12
+ counts: pd.DataFrame,
13
+ metadata: pd.DataFrame,
14
+ design: str = "~ condition",
15
+ method: str = "deseq2",
16
+ alpha: float = 0.05,
17
+ lfc_threshold: float = 0.0,
18
+ shrinkage: str | None = None,
19
+ contrast: list | None = None,
20
+ ) -> RosettaDataFrame:
21
+ """Run differential expression — full pipeline, one call.
22
+
23
+ Args:
24
+ counts: Gene count matrix (genes × samples), raw integers.
25
+ metadata: Sample metadata with index matching count columns.
26
+ design: R formula string (e.g. "~ condition", "~ batch + treatment").
27
+ method: One of "deseq2", "edger", "limma".
28
+ alpha: FDR significance threshold.
29
+ lfc_threshold: Minimum absolute log2 fold change.
30
+ shrinkage: For DESeq2: "apeglm", "ashr", or "normal". None = no shrinkage.
31
+ contrast: For DESeq2: [factor, numerator, denominator].
32
+
33
+ Returns:
34
+ RosettaDataFrame with .report() method.
35
+
36
+ Example:
37
+ >>> results = rb.pipelines.diff_expr(counts, meta, method="deseq2")
38
+ >>> results.report()
39
+ >>> sig_genes = results[results["padj"] < 0.05]
40
+ """
41
+ if method == "deseq2":
42
+ from .wrappers.deseq2 import run_deseq2, get_results, lfc_shrink
43
+
44
+ dds = run_deseq2(counts, metadata, design)
45
+
46
+ if shrinkage:
47
+ # Need coefficient name for shrinkage
48
+ from rpy2.robjects.packages import importr
49
+ from ._bridge import _converter
50
+ from rpy2.robjects.conversion import localconverter
51
+ deseq2_pkg = importr("DESeq2")
52
+ with localconverter(_converter):
53
+ coefs = list(deseq2_pkg.resultsNames(dds))
54
+ # Use last coefficient (typically the treatment effect)
55
+ coef = coefs[-1] if coefs else None
56
+ if coef:
57
+ return lfc_shrink(dds, coef=coef, type=shrinkage)
58
+
59
+ return get_results(dds, contrast=contrast, lfc_threshold=lfc_threshold, alpha=alpha)
60
+
61
+ elif method == "edger":
62
+ from .wrappers.edger import edger
63
+ return edger(counts, metadata, design, lfc=lfc_threshold)
64
+
65
+ elif method == "limma":
66
+ from .wrappers.limma import limma_voom
67
+ return limma_voom(counts, metadata, design)
68
+
69
+ else:
70
+ raise ValueError(f"Unknown method '{method}'. Use 'deseq2', 'edger', or 'limma'.")
71
+
72
+
73
+ def enrichment(
74
+ gene_list: list[str],
75
+ method: str = "go",
76
+ organism: str = "hsa",
77
+ org_db: str = "org.Hs.eg.db",
78
+ ont: str = "BP",
79
+ **kwargs,
80
+ ) -> RosettaDataFrame:
81
+ """Run pathway enrichment — full pipeline, one call.
82
+
83
+ Args:
84
+ gene_list: List of gene IDs (Entrez by default).
85
+ method: One of "go", "kegg", "reactome".
86
+ organism: KEGG organism code (default "hsa" for human).
87
+ org_db: OrgDb for GO (default "org.Hs.eg.db").
88
+ ont: GO ontology — "BP", "MF", or "CC".
89
+
90
+ Returns:
91
+ RosettaDataFrame with .report() method.
92
+
93
+ Example:
94
+ >>> results = rb.pipelines.enrichment(sig_genes, method="kegg")
95
+ >>> results.report()
96
+ """
97
+ if method == "go":
98
+ from .wrappers.clusterprofiler import enrich_go
99
+ return enrich_go(gene_list, organism=org_db, ont=ont, **kwargs)
100
+ elif method == "kegg":
101
+ from .wrappers.clusterprofiler import enrich_kegg
102
+ return enrich_kegg(gene_list, organism=organism, **kwargs)
103
+ elif method == "reactome":
104
+ from .wrappers.clusterprofiler import enrich_pathway
105
+ return enrich_pathway(gene_list, **kwargs)
106
+ else:
107
+ raise ValueError(f"Unknown method '{method}'. Use 'go', 'kegg', or 'reactome'.")
108
+
109
+
110
+ def compare(
111
+ counts: pd.DataFrame,
112
+ metadata: pd.DataFrame,
113
+ design: str = "~ condition",
114
+ methods: list[str] | None = None,
115
+ alpha: float = 0.05,
116
+ ) -> RosettaDataFrame:
117
+ """Run multiple DE methods and return a comparison summary.
118
+
119
+ This is the "which genes do all methods agree on?" function.
120
+
121
+ Args:
122
+ counts: Gene count matrix (genes × samples).
123
+ metadata: Sample metadata.
124
+ design: R formula string.
125
+ methods: List of methods to compare. Default: ["deseq2", "edger", "limma"].
126
+ alpha: FDR significance threshold.
127
+
128
+ Returns:
129
+ RosettaDataFrame with columns for each method's significance call
130
+ and an 'n_methods' column showing agreement count.
131
+
132
+ Example:
133
+ >>> consensus = rb.pipelines.compare(counts, meta)
134
+ >>> robust_genes = consensus[consensus["n_methods"] == 3]
135
+ """
136
+ if methods is None:
137
+ methods = ["deseq2", "edger", "limma"]
138
+
139
+ results = {}
140
+ for method in methods:
141
+ try:
142
+ res = diff_expr(counts, metadata, design, method=method, alpha=alpha)
143
+ # Extract significance column
144
+ if "padj" in res.columns:
145
+ results[method] = res["padj"] < alpha
146
+ elif "FDR" in res.columns:
147
+ results[method] = res["FDR"] < alpha
148
+ elif "adj.P.Val" in res.columns:
149
+ results[method] = res["adj.P.Val"] < alpha
150
+ except Exception as e:
151
+ print(f" ⚠ {method} failed: {e}")
152
+ continue
153
+
154
+ if not results:
155
+ raise RuntimeError("All methods failed")
156
+
157
+ comparison = pd.DataFrame(results)
158
+ comparison["n_methods"] = comparison.sum(axis=1)
159
+ comparison = comparison.sort_values("n_methods", ascending=False)
160
+ return RosettaDataFrame(comparison)