rpy-bridge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rpy_bridge-0.1.0/LICENSE +25 -0
- rpy_bridge-0.1.0/PKG-INFO +205 -0
- rpy_bridge-0.1.0/README.md +156 -0
- rpy_bridge-0.1.0/README.rst +13 -0
- rpy_bridge-0.1.0/pyproject.toml +42 -0
- rpy_bridge-0.1.0/setup.cfg +4 -0
- rpy_bridge-0.1.0/src/rpy_bridge/__init__.py +36 -0
- rpy_bridge-0.1.0/src/rpy_bridge/py.typed +0 -0
- rpy_bridge-0.1.0/src/rpy_bridge/rpy2_utils.py +569 -0
- rpy_bridge-0.1.0/src/rpy_bridge.egg-info/PKG-INFO +205 -0
- rpy_bridge-0.1.0/src/rpy_bridge.egg-info/SOURCES.txt +13 -0
- rpy_bridge-0.1.0/src/rpy_bridge.egg-info/dependency_links.txt +1 -0
- rpy_bridge-0.1.0/src/rpy_bridge.egg-info/requires.txt +5 -0
- rpy_bridge-0.1.0/src/rpy_bridge.egg-info/top_level.txt +1 -0
- rpy_bridge-0.1.0/tests/test_wrapper.py +10 -0
rpy_bridge-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Victoria Cheung
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
Acknowledgement: This project builds on work originally developed at
|
|
24
|
+
Revolution Medicines and interfaces with the rpy2 project, which is licensed
|
|
25
|
+
under the GNU General Public License version 2 or later.
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rpy-bridge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bridge helpers for calling R from Python via rpy2
|
|
5
|
+
Author-email: Victoria Cheung <victoriakcheung@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Victoria Cheung
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Acknowledgement: This project builds on work originally developed at
|
|
29
|
+
Revolution Medicines and interfaces with the rpy2 project, which is licensed
|
|
30
|
+
under the GNU General Public License version 2 or later.
|
|
31
|
+
|
|
32
|
+
Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
|
|
33
|
+
Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
40
|
+
Requires-Python: >=3.11
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Requires-Dist: numpy>=1.24
|
|
44
|
+
Requires-Dist: pandas>=2.0
|
|
45
|
+
Requires-Dist: rpy2>=3.5
|
|
46
|
+
Requires-Dist: loguru>=0.7
|
|
47
|
+
Requires-Dist: ipykernel>=7.1.0
|
|
48
|
+
Dynamic: license-file
|
|
49
|
+
|
|
50
|
+
# rpy-bridge
|
|
51
|
+
|
|
52
|
+
Utilities for calling R code from Python using `rpy2`. It provides a small
|
|
53
|
+
wrapper that can (optionally) activate an `renv` project, source an R
|
|
54
|
+
script, call functions from that script, and post-process results into
|
|
55
|
+
well-typed pandas `DataFrame` objects.
|
|
56
|
+
|
|
57
|
+
This project was developed for bilingual teams where some functions are
|
|
58
|
+
authored in R and the primary consumer is a Python-centric developer. It
|
|
59
|
+
acts as an interoperability layer so a Python programmer can call and reuse
|
|
60
|
+
R functions (written and maintained by R authors) without reimplementing
|
|
61
|
+
that logic in Python.
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
Prerequisites
|
|
66
|
+
|
|
67
|
+
- System R installed and available on `PATH` (rpy2 requires a working R
|
|
68
|
+
installation).
|
|
69
|
+
- Python 3.12+
|
|
70
|
+
|
|
71
|
+
Installation
|
|
72
|
+
|
|
73
|
+
Install from PyPI or as an editable local package during development:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# From PyPI (recommended for consumers)
|
|
77
|
+
python3 -m pip install rpy-bridge
|
|
78
|
+
|
|
79
|
+
# During development (install editable from local source)
|
|
80
|
+
python3 -m pip install -e .
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Required Python packages (the installer will pull these in):
|
|
84
|
+
|
|
85
|
+
- `rpy2` (GPLv2 or later)
|
|
86
|
+
- `pandas`
|
|
87
|
+
- `numpy`
|
|
88
|
+
|
|
89
|
+
## Usage
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from pathlib import Path
|
|
93
|
+
|
|
94
|
+
from rpy_bridge import RFunctionCaller
|
|
95
|
+
|
|
96
|
+
caller = RFunctionCaller(
|
|
97
|
+
path_to_renv=Path("/path/to/project"),
|
|
98
|
+
script_path=Path("/path/to/script.R"),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
summary_df = caller.call("summarize_cohort", cohort_df)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
## Examples
|
|
105
|
+
|
|
106
|
+
Basic — run a local R script
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from pathlib import Path
|
|
110
|
+
from rpy_bridge import RFunctionCaller
|
|
111
|
+
|
|
112
|
+
# If your project uses renv, pass the project directory (parent of renv/)
|
|
113
|
+
project_dir = Path("/path/to/your-r-project")
|
|
114
|
+
script = project_dir / "scripts" / "example.R"
|
|
115
|
+
|
|
116
|
+
# If you do not use renv, pass None for path_to_renv
|
|
117
|
+
caller = RFunctionCaller(path_to_renv=project_dir, script_path=script)
|
|
118
|
+
result = caller.call("some_function", 42, named_arg="value")
|
|
119
|
+
print(type(result))
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Notes:
|
|
123
|
+
|
|
124
|
+
`path_to_renv` may be either the project directory (containing `renv/`) or
|
|
125
|
+
the `renv/` directory itself. When provided, `RFunctionCaller` will call
|
|
126
|
+
`renv::load()` so the R session uses the project's library versions. If
|
|
127
|
+
`path_to_renv` is `None`, `rpy-bridge` will use whatever R environment is
|
|
128
|
+
visible to the Python process (system R or an R environment you activated
|
|
129
|
+
before starting Python).
|
|
130
|
+
|
|
131
|
+
The intended workflow is:
|
|
132
|
+
|
|
133
|
+
- Clone or download the R script into your local filesystem (review the
|
|
134
|
+
code if it came from a remote source).
|
|
135
|
+
- Construct an `RFunctionCaller` with `script_path` pointing to the local
|
|
136
|
+
script and optionally `path_to_renv` to activate the project's R library.
|
|
137
|
+
|
|
138
|
+
This keeps network, token, and SSL concerns outside the package while
|
|
139
|
+
preserving an easy path for Python-first users to call R-written functions.
|
|
140
|
+
|
|
141
|
+
If you need to run an R script from a remote repository, clone or download
|
|
142
|
+
the script locally, review it, and then construct an `RFunctionCaller`
|
|
143
|
+
pointing at the local `script_path`. This keeps network, token, and SSL
|
|
144
|
+
concerns outside the package and avoids environment-specific failures.
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from rpy_bridge import RFunctionCaller
|
|
148
|
+
|
|
149
|
+
project_dir = Path("/path/to/cloned/repo")
|
|
150
|
+
script = project_dir / "scripts" / "analysis.R"
|
|
151
|
+
|
|
152
|
+
caller = RFunctionCaller(path_to_renv=None, script_path=script)
|
|
153
|
+
result = caller.call("analyze", some_arg=42)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## R Setup
|
|
157
|
+
|
|
158
|
+
If you plan to execute R code with `rpy-bridge`, use the helper scripts in
|
|
159
|
+
`examples/r-deps/` to prepare an R environment.
|
|
160
|
+
|
|
161
|
+
- On macOS (Homebrew) install system deps:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
bash examples/r-deps/install_r_dev_deps_homebrew.sh
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
- Initialize a project `renv` (run in an R session):
|
|
168
|
+
|
|
169
|
+
```r
|
|
170
|
+
source("examples/r-deps/setup_env.R")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
- Restore the environment on a new machine:
|
|
174
|
+
|
|
175
|
+
```r
|
|
176
|
+
renv::restore()
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Review the scripts in `examples/r-deps/` before running; they install system
|
|
180
|
+
libraries and R packages and should be run from a trusted environment. For
|
|
181
|
+
CI, use `r-lib/actions/setup-r` to install R, then run the `Rscript` command
|
|
182
|
+
above to prepare the `renv` environment.
|
|
183
|
+
|
|
184
|
+
## Collaboration note
|
|
185
|
+
|
|
186
|
+
This repository provides example R setup scripts for teams working across
|
|
187
|
+
Python and R. Each project may require different R packages — check the
|
|
188
|
+
package list in `examples/r-deps/setup_env.R` and commit a `renv.lock` for
|
|
189
|
+
project-specific reproducibility.
|
|
190
|
+
|
|
191
|
+
Clone repositories containing R scripts locally or use your
|
|
192
|
+
preferred tooling to obtain scripts before execution.
|
|
193
|
+
|
|
194
|
+
## Licensing
|
|
195
|
+
|
|
196
|
+
- `rpy-bridge` is released under the MIT License © 2025 Victoria Cheung.
|
|
197
|
+
- The project depends on [`rpy2`](https://rpy2.github.io) which is licensed
|
|
198
|
+
under the GNU General Public License v2 (or later). Distributing binaries that
|
|
199
|
+
bundle `rpy2` must comply with the GPL terms. When you install `rpy-bridge`
|
|
200
|
+
as a dependency, `rpy2` is resolved directly from its upstream maintainers.
|
|
201
|
+
|
|
202
|
+
### Thanks
|
|
203
|
+
|
|
204
|
+
This package was spun out of internal tooling at Revolution Medicines.
|
|
205
|
+
Many thanks to the team there for allowing the code to be open sourced.
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# rpy-bridge
|
|
2
|
+
|
|
3
|
+
Utilities for calling R code from Python using `rpy2`. It provides a small
|
|
4
|
+
wrapper that can (optionally) activate an `renv` project, source an R
|
|
5
|
+
script, call functions from that script, and post-process results into
|
|
6
|
+
well-typed pandas `DataFrame` objects.
|
|
7
|
+
|
|
8
|
+
This project was developed for bilingual teams where some functions are
|
|
9
|
+
authored in R and the primary consumer is a Python-centric developer. It
|
|
10
|
+
acts as an interoperability layer so a Python programmer can call and reuse
|
|
11
|
+
R functions (written and maintained by R authors) without reimplementing
|
|
12
|
+
that logic in Python.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
Prerequisites
|
|
17
|
+
|
|
18
|
+
- System R installed and available on `PATH` (rpy2 requires a working R
|
|
19
|
+
installation).
|
|
20
|
+
- Python 3.12+
|
|
21
|
+
|
|
22
|
+
Installation
|
|
23
|
+
|
|
24
|
+
Install from PyPI or as an editable local package during development:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# From PyPI (recommended for consumers)
|
|
28
|
+
python3 -m pip install rpy-bridge
|
|
29
|
+
|
|
30
|
+
# During development (install editable from local source)
|
|
31
|
+
python3 -m pip install -e .
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Required Python packages (the installer will pull these in):
|
|
35
|
+
|
|
36
|
+
- `rpy2` (GPLv2 or later)
|
|
37
|
+
- `pandas`
|
|
38
|
+
- `numpy`
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
|
|
45
|
+
from rpy_bridge import RFunctionCaller
|
|
46
|
+
|
|
47
|
+
caller = RFunctionCaller(
|
|
48
|
+
path_to_renv=Path("/path/to/project"),
|
|
49
|
+
script_path=Path("/path/to/script.R"),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
summary_df = caller.call("summarize_cohort", cohort_df)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
## Examples
|
|
56
|
+
|
|
57
|
+
Basic — run a local R script
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from pathlib import Path
|
|
61
|
+
from rpy_bridge import RFunctionCaller
|
|
62
|
+
|
|
63
|
+
# If your project uses renv, pass the project directory (parent of renv/)
|
|
64
|
+
project_dir = Path("/path/to/your-r-project")
|
|
65
|
+
script = project_dir / "scripts" / "example.R"
|
|
66
|
+
|
|
67
|
+
# If you do not use renv, pass None for path_to_renv
|
|
68
|
+
caller = RFunctionCaller(path_to_renv=project_dir, script_path=script)
|
|
69
|
+
result = caller.call("some_function", 42, named_arg="value")
|
|
70
|
+
print(type(result))
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Notes:
|
|
74
|
+
|
|
75
|
+
`path_to_renv` may be either the project directory (containing `renv/`) or
|
|
76
|
+
the `renv/` directory itself. When provided, `RFunctionCaller` will call
|
|
77
|
+
`renv::load()` so the R session uses the project's library versions. If
|
|
78
|
+
`path_to_renv` is `None`, `rpy-bridge` will use whatever R environment is
|
|
79
|
+
visible to the Python process (system R or an R environment you activated
|
|
80
|
+
before starting Python).
|
|
81
|
+
|
|
82
|
+
The intended workflow is:
|
|
83
|
+
|
|
84
|
+
- Clone or download the R script into your local filesystem (review the
|
|
85
|
+
code if it came from a remote source).
|
|
86
|
+
- Construct an `RFunctionCaller` with `script_path` pointing to the local
|
|
87
|
+
script and optionally `path_to_renv` to activate the project's R library.
|
|
88
|
+
|
|
89
|
+
This keeps network, token, and SSL concerns outside the package while
|
|
90
|
+
preserving an easy path for Python-first users to call R-written functions.
|
|
91
|
+
|
|
92
|
+
If you need to run an R script from a remote repository, clone or download
|
|
93
|
+
the script locally, review it, and then construct an `RFunctionCaller`
|
|
94
|
+
pointing at the local `script_path`. This keeps network, token, and SSL
|
|
95
|
+
concerns outside the package and avoids environment-specific failures.
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from rpy_bridge import RFunctionCaller
|
|
99
|
+
|
|
100
|
+
project_dir = Path("/path/to/cloned/repo")
|
|
101
|
+
script = project_dir / "scripts" / "analysis.R"
|
|
102
|
+
|
|
103
|
+
caller = RFunctionCaller(path_to_renv=None, script_path=script)
|
|
104
|
+
result = caller.call("analyze", some_arg=42)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## R Setup
|
|
108
|
+
|
|
109
|
+
If you plan to execute R code with `rpy-bridge`, use the helper scripts in
|
|
110
|
+
`examples/r-deps/` to prepare an R environment.
|
|
111
|
+
|
|
112
|
+
- On macOS (Homebrew) install system deps:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
bash examples/r-deps/install_r_dev_deps_homebrew.sh
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
- Initialize a project `renv` (run in an R session):
|
|
119
|
+
|
|
120
|
+
```r
|
|
121
|
+
source("examples/r-deps/setup_env.R")
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
- Restore the environment on a new machine:
|
|
125
|
+
|
|
126
|
+
```r
|
|
127
|
+
renv::restore()
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Review the scripts in `examples/r-deps/` before running; they install system
|
|
131
|
+
libraries and R packages and should be run from a trusted environment. For
|
|
132
|
+
CI, use `r-lib/actions/setup-r` to install R, then run the `Rscript` command
|
|
133
|
+
above to prepare the `renv` environment.
|
|
134
|
+
|
|
135
|
+
## Collaboration note
|
|
136
|
+
|
|
137
|
+
This repository provides example R setup scripts for teams working across
|
|
138
|
+
Python and R. Each project may require different R packages — check the
|
|
139
|
+
package list in `examples/r-deps/setup_env.R` and commit a `renv.lock` for
|
|
140
|
+
project-specific reproducibility.
|
|
141
|
+
|
|
142
|
+
Clone repositories containing R scripts locally or use your
|
|
143
|
+
preferred tooling to obtain scripts before execution.
|
|
144
|
+
|
|
145
|
+
## Licensing
|
|
146
|
+
|
|
147
|
+
- `rpy-bridge` is released under the MIT License © 2025 Victoria Cheung.
|
|
148
|
+
- The project depends on [`rpy2`](https://rpy2.github.io) which is licensed
|
|
149
|
+
under the GNU General Public License v2 (or later). Distributing binaries that
|
|
150
|
+
bundle `rpy2` must comply with the GPL terms. When you install `rpy-bridge`
|
|
151
|
+
as a dependency, `rpy2` is resolved directly from its upstream maintainers.
|
|
152
|
+
|
|
153
|
+
### Thanks
|
|
154
|
+
|
|
155
|
+
This package was spun out of internal tooling at Revolution Medicines.
|
|
156
|
+
Many thanks to the team there for allowing the code to be open sourced.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
rpy-bridge
|
|
2
|
+
=========
|
|
3
|
+
|
|
4
|
+
Usage example (local script):
|
|
5
|
+
|
|
6
|
+
.. code-block:: python
|
|
7
|
+
|
|
8
|
+
from rpy_bridge.rpy2_utils import RFunctionCaller
|
|
9
|
+
|
|
10
|
+
# Use a local script path (clone or download remote scripts yourself)
|
|
11
|
+
script_path = "/path/to/cloned/repo/scripts/my_script.R"
|
|
12
|
+
caller = RFunctionCaller(path_to_renv=None, script_path=script_path)
|
|
13
|
+
result = caller.call("my_func")
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "rpy-bridge"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Bridge helpers for calling R from Python via rpy2"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { file = "LICENSE" }
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Victoria Cheung", email = "victoriakcheung@gmail.com" }
|
|
9
|
+
]
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"numpy>=1.24",
|
|
13
|
+
"pandas>=2.0",
|
|
14
|
+
"rpy2>=3.5",
|
|
15
|
+
"loguru>=0.7",
|
|
16
|
+
"ipykernel>=7.1.0",
|
|
17
|
+
]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/vic-cheung/rpy-bridge"
|
|
29
|
+
"Issue Tracker" = "https://github.com/vic-cheung/rpy-bridge/issues"
|
|
30
|
+
|
|
31
|
+
[build-system]
|
|
32
|
+
requires = ["setuptools>=61"]
|
|
33
|
+
build-backend = "setuptools.build_meta"
|
|
34
|
+
|
|
35
|
+
[tool.uv]
|
|
36
|
+
dev-dependencies = [
|
|
37
|
+
"ruff>=0.6",
|
|
38
|
+
"pytest>=8.0",
|
|
39
|
+
"build>=1.0",
|
|
40
|
+
"twine>=4.0",
|
|
41
|
+
"certifi>=2025.0",
|
|
42
|
+
]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Public API for the rpy-bridge package.
|
|
3
|
+
|
|
4
|
+
This module re-exports the helpers that wrap rpy2 so downstream users can
|
|
5
|
+
continue importing directly from ``rpy_bridge``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .rpy2_utils import (
|
|
9
|
+
RFunctionCaller,
|
|
10
|
+
activate_renv,
|
|
11
|
+
align_numeric_dtypes,
|
|
12
|
+
clean_r_dataframe,
|
|
13
|
+
compare_r_py_dataframes,
|
|
14
|
+
fix_r_dataframe_types,
|
|
15
|
+
fix_string_nans,
|
|
16
|
+
normalize_dtypes,
|
|
17
|
+
normalize_single_df_dtypes,
|
|
18
|
+
postprocess_r_dataframe,
|
|
19
|
+
replace_r_na,
|
|
20
|
+
r_namedlist_to_dict,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"activate_renv",
|
|
25
|
+
"RFunctionCaller",
|
|
26
|
+
"r_namedlist_to_dict",
|
|
27
|
+
"clean_r_dataframe",
|
|
28
|
+
"fix_string_nans",
|
|
29
|
+
"replace_r_na",
|
|
30
|
+
"normalize_single_df_dtypes",
|
|
31
|
+
"fix_r_dataframe_types",
|
|
32
|
+
"postprocess_r_dataframe",
|
|
33
|
+
"normalize_dtypes",
|
|
34
|
+
"align_numeric_dtypes",
|
|
35
|
+
"compare_r_py_dataframes",
|
|
36
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Wrapper for calling R functions from Python using rpy2.
|
|
3
|
+
|
|
4
|
+
----------
|
|
5
|
+
** R must be installed and accessible in your environment **
|
|
6
|
+
Ensure compatibility with your R project's renv setup (or other virtual env/base env if that's what you're using).
|
|
7
|
+
----------
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# ruff: noqa: E402
|
|
11
|
+
# %%
|
|
12
|
+
# Import libraries
|
|
13
|
+
import os
|
|
14
|
+
import warnings
|
|
15
|
+
|
|
16
|
+
warnings.filterwarnings("ignore", message="Environment variable .* redefined by R")
|
|
17
|
+
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import rpy2.robjects as ro
|
|
23
|
+
from rpy2 import robjects
|
|
24
|
+
from rpy2.rinterface_lib.sexp import NULLType
|
|
25
|
+
from rpy2.rlike.container import NamedList
|
|
26
|
+
from rpy2.robjects import pandas2ri
|
|
27
|
+
from rpy2.robjects.conversion import localconverter
|
|
28
|
+
from rpy2.robjects.vectors import (
|
|
29
|
+
BoolVector,
|
|
30
|
+
FloatVector,
|
|
31
|
+
IntVector,
|
|
32
|
+
ListVector,
|
|
33
|
+
StrVector,
|
|
34
|
+
)
|
|
35
|
+
from typing import Optional
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
from loguru import logger # type: ignore
|
|
39
|
+
except Exception:
|
|
40
|
+
import logging
|
|
41
|
+
|
|
42
|
+
logging.basicConfig()
|
|
43
|
+
logger = logging.getLogger("rpy-bridge")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# %%
|
|
47
|
+
def activate_renv(path_to_renv: Path) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Activates the renv environment using renv::load() to ensure the correct project is loaded.
|
|
50
|
+
This avoids sourcing activate.R directly and avoids accidentally initializing a new environment.
|
|
51
|
+
|
|
52
|
+
Accepts either:
|
|
53
|
+
- Direct path to renv directory (e.g., /path/to/renv)
|
|
54
|
+
- Parent directory containing renv/ folder (e.g., /path/to/repos where renv/ is inside)
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
path_to_renv = path_to_renv.resolve()
|
|
58
|
+
|
|
59
|
+
# Determine if path_to_renv is the renv directory itself or its parent
|
|
60
|
+
if path_to_renv.name == "renv" and (path_to_renv / "activate.R").exists():
|
|
61
|
+
# Path points directly to renv directory
|
|
62
|
+
renv_dir = path_to_renv
|
|
63
|
+
renv_project_dir = path_to_renv.parent
|
|
64
|
+
else:
|
|
65
|
+
# Path points to parent directory containing renv/
|
|
66
|
+
renv_dir = path_to_renv / "renv"
|
|
67
|
+
renv_project_dir = path_to_renv
|
|
68
|
+
|
|
69
|
+
renv_activate = renv_dir / "activate.R"
|
|
70
|
+
renv_lock = renv_project_dir / "renv.lock"
|
|
71
|
+
|
|
72
|
+
if not renv_activate.exists() or not renv_lock.exists():
|
|
73
|
+
raise FileNotFoundError(
|
|
74
|
+
f"[Error] renv environment not found or incomplete.\n"
|
|
75
|
+
f" Expected activate.R at: {renv_activate}\n"
|
|
76
|
+
f" Expected renv.lock at: {renv_lock}\n"
|
|
77
|
+
f" Provided path: {path_to_renv}"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Optional: set R_ENVIRON_USER if .Renviron exists
|
|
81
|
+
renviron_file = renv_project_dir / ".Renviron"
|
|
82
|
+
if renviron_file.is_file():
|
|
83
|
+
os.environ["R_ENVIRON_USER"] = str(renviron_file)
|
|
84
|
+
logger.info("R_ENVIRON_USER set to: {}", renviron_file)
|
|
85
|
+
|
|
86
|
+
# Load the renv package
|
|
87
|
+
try:
|
|
88
|
+
robjects.r("library(renv)")
|
|
89
|
+
except Exception:
|
|
90
|
+
print("[Info] renv package not found in R. Attempting to install...")
|
|
91
|
+
robjects.r('install.packages("renv", repos="https://cloud.r-project.org")')
|
|
92
|
+
# Try loading again after installation
|
|
93
|
+
robjects.r("library(renv)")
|
|
94
|
+
|
|
95
|
+
# Load the renv environment using renv::load(path)
|
|
96
|
+
try:
|
|
97
|
+
logger.info("Using R at: {}", robjects.r("R.home()")[0])
|
|
98
|
+
robjects.r(f'renv::load("{renv_project_dir.as_posix()}")')
|
|
99
|
+
logger.info("renv environment loaded for project: {}", renv_project_dir)
|
|
100
|
+
except Exception as e:
|
|
101
|
+
raise RuntimeError(f"[Error] Failed to load renv environment: {e}")
|
|
102
|
+
|
|
103
|
+
logger.debug(".libPaths(): {}", robjects.r(".libPaths()"))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# %%
|
|
107
|
+
class RFunctionCaller:
|
|
108
|
+
"""
|
|
109
|
+
A utility class to load and execute R functions from a specified R script using rpy2.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(self, path_to_renv: Path | None, script_path: Path):
|
|
113
|
+
"""
|
|
114
|
+
Initialize the RFunctionCaller with the path to the renv environment and the R script.
|
|
115
|
+
Set path_to_renv to None if no renv is used.
|
|
116
|
+
"""
|
|
117
|
+
if not script_path.exists():
|
|
118
|
+
raise FileNotFoundError(f"R script not found: {script_path}")
|
|
119
|
+
|
|
120
|
+
self.path_to_renv = path_to_renv.resolve() if path_to_renv else None
|
|
121
|
+
|
|
122
|
+
self.script_path = script_path.resolve()
|
|
123
|
+
self.script_dir = self.script_path.parent
|
|
124
|
+
|
|
125
|
+
self._load_script()
|
|
126
|
+
|
|
127
|
+
def _load_script(self):
|
|
128
|
+
"""
|
|
129
|
+
Set the R working directory and source the R script.
|
|
130
|
+
"""
|
|
131
|
+
if self.path_to_renv:
|
|
132
|
+
activate_renv(self.path_to_renv)
|
|
133
|
+
else:
|
|
134
|
+
logger.info("No renv path provided; using base or current environment.")
|
|
135
|
+
|
|
136
|
+
# Set the working directory to the script's directory
|
|
137
|
+
robjects.r(f'setwd("{self.script_dir.as_posix()}")')
|
|
138
|
+
robjects.r(f'source("{self.script_path.as_posix()}")')
|
|
139
|
+
logger.info("R script sourced: {}", self.script_path.name)
|
|
140
|
+
|
|
141
|
+
def call(self, function_name: str, *args: object, **kwargs: object) -> object:
|
|
142
|
+
"""
|
|
143
|
+
Call an R function from the sourced script, and recursively convert &
|
|
144
|
+
post-process the result.
|
|
145
|
+
|
|
146
|
+
Handles:
|
|
147
|
+
- Direct data.frame
|
|
148
|
+
- NamedList or ListVector
|
|
149
|
+
- Nested lists with data.frames inside
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def _recursive_postprocess(obj):
|
|
153
|
+
# Handle single DataFrame
|
|
154
|
+
if isinstance(obj, pd.DataFrame):
|
|
155
|
+
return postprocess_r_dataframe(obj)
|
|
156
|
+
|
|
157
|
+
# Handle dictionary (e.g. NamedList converted)
|
|
158
|
+
elif isinstance(obj, dict):
|
|
159
|
+
return {k: _recursive_postprocess(v) for k, v in obj.items()}
|
|
160
|
+
|
|
161
|
+
# Handle list of items
|
|
162
|
+
elif isinstance(obj, list):
|
|
163
|
+
return [_recursive_postprocess(item) for item in obj]
|
|
164
|
+
|
|
165
|
+
return obj # Primitive values stay as-is
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
r_func = robjects.globalenv[function_name]
|
|
169
|
+
|
|
170
|
+
with localconverter(robjects.default_converter + pandas2ri.converter):
|
|
171
|
+
r_args = [robjects.conversion.py2rpy(arg) for arg in args]
|
|
172
|
+
r_kwargs = {k: robjects.conversion.py2rpy(v) for k, v in kwargs.items()}
|
|
173
|
+
result = r_func(*r_args, **r_kwargs)
|
|
174
|
+
|
|
175
|
+
# Step 1: Try direct conversion
|
|
176
|
+
with localconverter(robjects.default_converter + pandas2ri.converter):
|
|
177
|
+
py_result = robjects.conversion.rpy2py(result)
|
|
178
|
+
|
|
179
|
+
# Step 2: If it's still an R container, convert it
|
|
180
|
+
if isinstance(py_result, (NamedList, ListVector)):
|
|
181
|
+
py_result = r_namedlist_to_dict(py_result)
|
|
182
|
+
|
|
183
|
+
# Step 3: Recursively process any nested frames
|
|
184
|
+
return replace_r_na(_recursive_postprocess(py_result))
|
|
185
|
+
|
|
186
|
+
except KeyError:
|
|
187
|
+
raise ValueError(f"Function '{function_name}' not found in the R script.")
|
|
188
|
+
except Exception as e:
|
|
189
|
+
raise RuntimeError(f"Error calling R function '{function_name}': {e}")
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def from_github(
|
|
193
|
+
cls,
|
|
194
|
+
repo: str,
|
|
195
|
+
file_path: str,
|
|
196
|
+
ref: str = "main",
|
|
197
|
+
token: Optional[str] = None,
|
|
198
|
+
cache_dir: Optional[Path] = None,
|
|
199
|
+
path_to_renv: Optional[Path] = None,
|
|
200
|
+
trust_remote_code: bool = False,
|
|
201
|
+
require_token: bool = False,
|
|
202
|
+
) -> "RFunctionCaller | Path":
|
|
203
|
+
"""
|
|
204
|
+
Download an R script from a GitHub repository and construct an RFunctionCaller.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
repo: repository in the form "owner/repo".
|
|
208
|
+
file_path: path to the R script inside the repo (e.g. "scripts/my.R").
|
|
209
|
+
ref: branch name, tag or commit SHA. Defaults to "main".
|
|
210
|
+
token: optional GitHub token for private repos. If None, looks at
|
|
211
|
+
environment variables `GITHUB_TOKEN` or `GH_TOKEN`.
|
|
212
|
+
cache_dir: optional directory to cache downloaded files. Defaults to
|
|
213
|
+
`~/.cache/rpy-bridge`.
|
|
214
|
+
path_to_renv: optional path to renv or project directory to use.
|
|
215
|
+
trust_remote_code: MUST be True to execute remote code. If False,
|
|
216
|
+
the function will only return the local cached path.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
If `trust_remote_code` is True, returns an `RFunctionCaller` instance
|
|
220
|
+
ready to call functions from the downloaded script. Otherwise returns
|
|
221
|
+
the `Path` to the cached script so the caller can inspect it first.
|
|
222
|
+
"""
|
|
223
|
+
raise NotImplementedError(
|
|
224
|
+
"RFunctionCaller.from_github was removed. Clone repositories locally and pass a local script_path to RFunctionCaller instead."
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# %%
|
|
229
|
+
def r_namedlist_to_dict(namedlist: object) -> object:
|
|
230
|
+
"""
|
|
231
|
+
Recursively convert an R NamedList or ListVector to a Python dictionary.
|
|
232
|
+
- Unwrap atomic R vectors (StrVector, IntVector, etc.) into Python lists or dicts if named.
|
|
233
|
+
- Convert data.frames to pandas DataFrames.
|
|
234
|
+
- Handles NULL or unnamed cases gracefully.
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
# -------------------------------------------
|
|
238
|
+
# Handle named lists (NamedList or ListVector)
|
|
239
|
+
# -------------------------------------------
|
|
240
|
+
if isinstance(namedlist, (NamedList, ListVector)):
|
|
241
|
+
names = namedlist.names if not callable(namedlist.names) else namedlist.names()
|
|
242
|
+
result = {}
|
|
243
|
+
|
|
244
|
+
# Only iterate if names is not NULL
|
|
245
|
+
if not isinstance(names, NULLType):
|
|
246
|
+
for key, value in zip(names, namedlist):
|
|
247
|
+
key_str = str(key) if key is not None and not isinstance(key, NULLType) else None
|
|
248
|
+
if key_str:
|
|
249
|
+
result[key_str] = r_namedlist_to_dict(value)
|
|
250
|
+
return result
|
|
251
|
+
|
|
252
|
+
# If no names, fallback to a list
|
|
253
|
+
return [r_namedlist_to_dict(value) for value in namedlist]
|
|
254
|
+
|
|
255
|
+
# -------------------------------------------
|
|
256
|
+
# Handle atomic vectors (StrVector, IntVector, etc.)
|
|
257
|
+
# These may have names (e.g., c(a = 1, b = 2)) — if so, return a dict.
|
|
258
|
+
# Otherwise, convert to plain Python list.
|
|
259
|
+
# -------------------------------------------
|
|
260
|
+
if isinstance(namedlist, (StrVector, IntVector, FloatVector, BoolVector)):
|
|
261
|
+
names = namedlist.names if not callable(namedlist.names) else namedlist.names()
|
|
262
|
+
if not isinstance(names, NULLType):
|
|
263
|
+
return {
|
|
264
|
+
str(n): v
|
|
265
|
+
for n, v in zip(names, list(namedlist))
|
|
266
|
+
if n is not None and not isinstance(n, NULLType)
|
|
267
|
+
}
|
|
268
|
+
return list(namedlist)
|
|
269
|
+
|
|
270
|
+
# -------------------------------------------
|
|
271
|
+
# Attempt conversion via pandas2ri — works for data.frames, tibbles, etc.
|
|
272
|
+
# If it fails, fall back to returning the original R object.
|
|
273
|
+
# -------------------------------------------
|
|
274
|
+
with localconverter(robjects.default_converter + pandas2ri.converter):
|
|
275
|
+
try:
|
|
276
|
+
return robjects.conversion.rpy2py(namedlist)
|
|
277
|
+
except Exception:
|
|
278
|
+
return namedlist
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# %%
|
|
282
|
+
def clean_r_dataframe(r_df: object) -> object:
|
|
283
|
+
"""
|
|
284
|
+
Clean an R data.frame object by removing common non-structural attributes
|
|
285
|
+
like .groups and .rows.
|
|
286
|
+
"""
|
|
287
|
+
for attr in [".groups", ".rows"]:
|
|
288
|
+
try:
|
|
289
|
+
del r_df.attr[attr]
|
|
290
|
+
except (KeyError, AttributeError):
|
|
291
|
+
pass
|
|
292
|
+
return r_df
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# %%
|
|
296
|
+
def fix_string_nans(df: pd.DataFrame) -> pd.DataFrame:
|
|
297
|
+
# Replace common string versions of NA/NaN with actual pd.NA
|
|
298
|
+
return df.replace(["nan", "NaN", "NA", "na", ""], pd.NA)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# %%
|
|
302
|
+
def replace_r_na(obj: object) -> object:
|
|
303
|
+
"""
|
|
304
|
+
Recursively replace R NA_Character with np.nan in any structure.
|
|
305
|
+
"""
|
|
306
|
+
# Handle DataFrame
|
|
307
|
+
if isinstance(obj, pd.DataFrame):
|
|
308
|
+
return (
|
|
309
|
+
obj.replace({ro.NA_Character: np.nan}, regex=False)
|
|
310
|
+
if hasattr(ro, "NA_Character")
|
|
311
|
+
else obj
|
|
312
|
+
)
|
|
313
|
+
elif isinstance(obj, dict):
|
|
314
|
+
return {k: replace_r_na(v) for k, v in obj.items()}
|
|
315
|
+
elif isinstance(obj, list):
|
|
316
|
+
return [replace_r_na(item) for item in obj]
|
|
317
|
+
elif hasattr(ro, "NA_Character") and obj is ro.NA_Character:
|
|
318
|
+
return np.nan
|
|
319
|
+
else:
|
|
320
|
+
return obj
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# %%
|
|
324
|
+
def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
|
|
325
|
+
df = df.replace(["", "nan", "NaN", "NA", "na"], pd.NA)
|
|
326
|
+
|
|
327
|
+
for col in df.columns:
|
|
328
|
+
series = df[col]
|
|
329
|
+
|
|
330
|
+
# Try converting object/string columns to numeric if possible
|
|
331
|
+
if pd.api.types.is_object_dtype(series):
|
|
332
|
+
coerced = pd.to_numeric(series, errors="coerce")
|
|
333
|
+
# Replace column if conversion produced fewer NaNs (meaning more numeric)
|
|
334
|
+
if coerced.notna().sum() >= series.notna().sum() * 0.5:
|
|
335
|
+
df[col] = coerced
|
|
336
|
+
|
|
337
|
+
# Cast integer columns with NA to float to accommodate pd.NA
|
|
338
|
+
if pd.api.types.is_integer_dtype(df[col]):
|
|
339
|
+
if df[col].isna().any():
|
|
340
|
+
df[col] = df[col].astype("float64")
|
|
341
|
+
|
|
342
|
+
return df
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# %%
|
|
346
|
+
def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
|
|
347
|
+
"""
|
|
348
|
+
Post-process a DataFrame converted from R via rpy2:
|
|
349
|
+
- Converts numeric columns that represent R dates into datetime
|
|
350
|
+
- Converts timezone-aware datetimes to naive datetimes
|
|
351
|
+
- Replaces R's NA_integer_ sentinel (-2147483648) with pd.NA
|
|
352
|
+
"""
|
|
353
|
+
for col in df.columns:
|
|
354
|
+
series = df[col]
|
|
355
|
+
|
|
356
|
+
# Fix R's NA_integer_ sentinel (-2147483648)
|
|
357
|
+
if pd.api.types.is_integer_dtype(series):
|
|
358
|
+
if (series == -2147483648).any():
|
|
359
|
+
df[col] = series.mask(series == -2147483648, pd.NA)
|
|
360
|
+
|
|
361
|
+
# Convert R-style date columns (days since 1970) to datetime
|
|
362
|
+
if pd.api.types.is_numeric_dtype(series):
|
|
363
|
+
values = series.dropna()
|
|
364
|
+
if not values.empty and values.between(10000, 40000).all():
|
|
365
|
+
try:
|
|
366
|
+
# "1970-01-01" is the reference date for Unix Epoch
|
|
367
|
+
df[col] = pd.to_datetime("1970-01-01") + pd.to_timedelta(series, unit="D")
|
|
368
|
+
except Exception:
|
|
369
|
+
pass
|
|
370
|
+
|
|
371
|
+
# Remove timezone from datetime columns (e.g., POSIXct with tz)
|
|
372
|
+
if pd.api.types.is_datetime64tz_dtype(series):
|
|
373
|
+
df[col] = series.dt.tz_localize(None)
|
|
374
|
+
|
|
375
|
+
return df
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# %%
|
|
379
|
+
def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
380
|
+
df = fix_r_dataframe_types(df)
|
|
381
|
+
df = fix_string_nans(df)
|
|
382
|
+
df = normalize_single_df_dtypes(df)
|
|
383
|
+
|
|
384
|
+
# Normalize R-style string index starting from "1"
|
|
385
|
+
if df.index.dtype == object:
|
|
386
|
+
try:
|
|
387
|
+
int_index = df.index.astype(int)
|
|
388
|
+
if (int_index == (np.arange(len(df)) + 1)).all():
|
|
389
|
+
df.index = pd.RangeIndex(start=0, stop=len(df))
|
|
390
|
+
except Exception:
|
|
391
|
+
pass # leave index as-is if not convertible
|
|
392
|
+
return df
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
# Note: GitHub fetch helpers were removed to keep the API focused on
|
|
396
|
+
# local script invocation. If you need to run remote scripts, clone the
|
|
397
|
+
# repository locally and pass the local `script_path` to `RFunctionCaller`.
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
# %%
|
|
401
|
+
# -------------------------------------------
|
|
402
|
+
# Functions here onwards are utility functions
|
|
403
|
+
# for comparing R and Python DataFrames.
|
|
404
|
+
# -------------------------------------------
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
408
|
+
"""
|
|
409
|
+
Aligns column dtypes across two DataFrames for accurate comparison.
|
|
410
|
+
- Replaces empty strings with pd.NA.
|
|
411
|
+
- Attempts to coerce strings to numeric where applicable.
|
|
412
|
+
- Aligns dtypes between matching columns (e.g. float64 vs int64).
|
|
413
|
+
"""
|
|
414
|
+
for col in df1.columns.intersection(df2.columns):
|
|
415
|
+
# Replace empty strings with NA
|
|
416
|
+
df1[col] = df1[col].replace("", pd.NA)
|
|
417
|
+
df2[col] = df2[col].replace("", pd.NA)
|
|
418
|
+
|
|
419
|
+
s1, s2 = df1[col], df2[col]
|
|
420
|
+
dtype1, dtype2 = s1.dtype, s2.dtype
|
|
421
|
+
|
|
422
|
+
# If one is numeric and the other is object, try coercing both to numeric
|
|
423
|
+
if (pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_object_dtype(dtype2)) or (
|
|
424
|
+
pd.api.types.is_object_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2)
|
|
425
|
+
):
|
|
426
|
+
try:
|
|
427
|
+
df1[col] = pd.to_numeric(s1, errors="coerce")
|
|
428
|
+
df2[col] = pd.to_numeric(s2, errors="coerce")
|
|
429
|
+
continue # skip to next column if coercion succeeds
|
|
430
|
+
except Exception:
|
|
431
|
+
pass # fallback to next block if coercion fails
|
|
432
|
+
|
|
433
|
+
# If both are numeric but of different types (e.g., int vs float), unify to float64
|
|
434
|
+
if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2):
|
|
435
|
+
df1[col] = df1[col].astype("float64")
|
|
436
|
+
df2[col] = df2[col].astype("float64")
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
# If both are objects or strings, convert both to str for equality comparison
|
|
440
|
+
if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
|
|
441
|
+
df1[col] = df1[col].astype(str)
|
|
442
|
+
df2[col] = df2[col].astype(str)
|
|
443
|
+
|
|
444
|
+
return df1, df2
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
# %%
|
|
448
|
+
def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
449
|
+
"""
|
|
450
|
+
Ensure aligned numeric dtypes between two DataFrames for accurate comparison.
|
|
451
|
+
Converts between int, float, and numeric-looking strings where appropriate.
|
|
452
|
+
Also handles NA and empty string normalization.
|
|
453
|
+
"""
|
|
454
|
+
for col in df1.columns.intersection(df2.columns):
|
|
455
|
+
s1, s2 = df1[col], df2[col]
|
|
456
|
+
|
|
457
|
+
# Replace empty strings with NA to avoid type promotion issues
|
|
458
|
+
s1 = s1.replace("", pd.NA)
|
|
459
|
+
s2 = s2.replace("", pd.NA)
|
|
460
|
+
|
|
461
|
+
# Try to coerce both to numeric (non-destructive)
|
|
462
|
+
try:
|
|
463
|
+
s1_num = pd.to_numeric(s1, errors="coerce")
|
|
464
|
+
s2_num = pd.to_numeric(s2, errors="coerce")
|
|
465
|
+
|
|
466
|
+
# If at least one successfully converts and it's not all NaN
|
|
467
|
+
if not s1_num.isna().all() or not s2_num.isna().all():
|
|
468
|
+
df1[col] = s1_num.astype("float64")
|
|
469
|
+
df2[col] = s2_num.astype("float64")
|
|
470
|
+
continue # move to next column
|
|
471
|
+
except Exception:
|
|
472
|
+
pass
|
|
473
|
+
|
|
474
|
+
# Otherwise, fall back to original values
|
|
475
|
+
df1[col] = s1
|
|
476
|
+
df2[col] = s2
|
|
477
|
+
|
|
478
|
+
return df1, df2
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
# %%
|
|
482
|
+
def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8) -> dict:
|
|
483
|
+
"""
|
|
484
|
+
Compare a Python DataFrame (df1) with an R DataFrame converted to pandas (df2).
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
dict with mismatch diagnostics, preserving original indices in diffs.
|
|
488
|
+
"""
|
|
489
|
+
|
|
490
|
+
results = {
|
|
491
|
+
"shape_mismatch": False,
|
|
492
|
+
"columns_mismatch": False,
|
|
493
|
+
"index_mismatch": False,
|
|
494
|
+
"numeric_diffs": {},
|
|
495
|
+
"non_numeric_diffs": {},
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
# --- Preprocessing: fix R-specific issues ---
|
|
499
|
+
df2 = fix_r_dataframe_types(df2)
|
|
500
|
+
|
|
501
|
+
# --- Replace common string NAs with proper pd.NA ---
|
|
502
|
+
df1 = fix_string_nans(df1)
|
|
503
|
+
df2 = fix_string_nans(df2)
|
|
504
|
+
|
|
505
|
+
# --- Normalize and align dtypes ---
|
|
506
|
+
df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
|
|
507
|
+
df1, df2 = align_numeric_dtypes(df1, df2)
|
|
508
|
+
|
|
509
|
+
# --- Check shape ---
|
|
510
|
+
if df1.shape != df2.shape:
|
|
511
|
+
results["shape_mismatch"] = True
|
|
512
|
+
print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
|
|
513
|
+
|
|
514
|
+
# --- Check columns ---
|
|
515
|
+
if set(df1.columns) != set(df2.columns):
|
|
516
|
+
results["columns_mismatch"] = True
|
|
517
|
+
print("[Warning] Column mismatch:")
|
|
518
|
+
print(f" df1: {df1.columns}")
|
|
519
|
+
print(f" df2: {df2.columns}")
|
|
520
|
+
common_cols = df1.columns.intersection(df2.columns)
|
|
521
|
+
else:
|
|
522
|
+
common_cols = df1.columns
|
|
523
|
+
|
|
524
|
+
# --- Ensure columns are the same order ---
|
|
525
|
+
df1_aligned = df1.loc[:, common_cols]
|
|
526
|
+
df2_aligned = df2.loc[:, common_cols]
|
|
527
|
+
|
|
528
|
+
# --- Compare values column by column ---
|
|
529
|
+
for col in common_cols:
|
|
530
|
+
col_py = df1_aligned[col]
|
|
531
|
+
col_r = df2_aligned[col]
|
|
532
|
+
|
|
533
|
+
if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(col_r):
|
|
534
|
+
col_py, col_r = col_py.align(col_r)
|
|
535
|
+
|
|
536
|
+
close = np.isclose(
|
|
537
|
+
col_py.fillna(np.nan),
|
|
538
|
+
col_r.fillna(np.nan),
|
|
539
|
+
atol=float_tol,
|
|
540
|
+
equal_nan=True,
|
|
541
|
+
)
|
|
542
|
+
if not close.all():
|
|
543
|
+
diffs = pd.DataFrame(
|
|
544
|
+
{
|
|
545
|
+
"df1": col_py[~close],
|
|
546
|
+
"df2": col_r[~close],
|
|
547
|
+
}
|
|
548
|
+
)
|
|
549
|
+
results["numeric_diffs"][col] = diffs
|
|
550
|
+
|
|
551
|
+
else:
|
|
552
|
+
# Treat missing values as equal: create mask where values differ excluding matching NAs
|
|
553
|
+
unequal = ~col_py.eq(col_r)
|
|
554
|
+
both_na = col_py.isna() & col_r.isna()
|
|
555
|
+
unequal = unequal & ~both_na
|
|
556
|
+
|
|
557
|
+
if unequal.any():
|
|
558
|
+
diffs = pd.DataFrame(
|
|
559
|
+
{
|
|
560
|
+
"df1": col_py[unequal],
|
|
561
|
+
"df2": col_r[unequal],
|
|
562
|
+
}
|
|
563
|
+
)
|
|
564
|
+
results["non_numeric_diffs"][col] = diffs
|
|
565
|
+
|
|
566
|
+
return results
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
# %%
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rpy-bridge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bridge helpers for calling R from Python via rpy2
|
|
5
|
+
Author-email: Victoria Cheung <victoriakcheung@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Victoria Cheung
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Acknowledgement: This project builds on work originally developed at
|
|
29
|
+
Revolution Medicines and interfaces with the rpy2 project, which is licensed
|
|
30
|
+
under the GNU General Public License version 2 or later.
|
|
31
|
+
|
|
32
|
+
Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
|
|
33
|
+
Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
40
|
+
Requires-Python: >=3.11
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Requires-Dist: numpy>=1.24
|
|
44
|
+
Requires-Dist: pandas>=2.0
|
|
45
|
+
Requires-Dist: rpy2>=3.5
|
|
46
|
+
Requires-Dist: loguru>=0.7
|
|
47
|
+
Requires-Dist: ipykernel>=7.1.0
|
|
48
|
+
Dynamic: license-file
|
|
49
|
+
|
|
50
|
+
# rpy-bridge
|
|
51
|
+
|
|
52
|
+
Utilities for calling R code from Python using `rpy2`. It provides a small
|
|
53
|
+
wrapper that can (optionally) activate an `renv` project, source an R
|
|
54
|
+
script, call functions from that script, and post-process results into
|
|
55
|
+
well-typed pandas `DataFrame` objects.
|
|
56
|
+
|
|
57
|
+
This project was developed for bilingual teams where some functions are
|
|
58
|
+
authored in R and the primary consumer is a Python-centric developer. It
|
|
59
|
+
acts as an interoperability layer so a Python programmer can call and reuse
|
|
60
|
+
R functions (written and maintained by R authors) without reimplementing
|
|
61
|
+
that logic in Python.
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
Prerequisites
|
|
66
|
+
|
|
67
|
+
- System R installed and available on `PATH` (rpy2 requires a working R
|
|
68
|
+
installation).
|
|
69
|
+
- Python 3.12+
|
|
70
|
+
|
|
71
|
+
Installation
|
|
72
|
+
|
|
73
|
+
Install from PyPI or as an editable local package during development:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# From PyPI (recommended for consumers)
|
|
77
|
+
python3 -m pip install rpy-bridge
|
|
78
|
+
|
|
79
|
+
# During development (install editable from local source)
|
|
80
|
+
python3 -m pip install -e .
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Required Python packages (the installer will pull these in):
|
|
84
|
+
|
|
85
|
+
- `rpy2` (GPLv2 or later)
|
|
86
|
+
- `pandas`
|
|
87
|
+
- `numpy`
|
|
88
|
+
|
|
89
|
+
## Usage
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from pathlib import Path
|
|
93
|
+
|
|
94
|
+
from rpy_bridge import RFunctionCaller
|
|
95
|
+
|
|
96
|
+
caller = RFunctionCaller(
|
|
97
|
+
path_to_renv=Path("/path/to/project"),
|
|
98
|
+
script_path=Path("/path/to/script.R"),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
summary_df = caller.call("summarize_cohort", cohort_df)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
## Examples
|
|
105
|
+
|
|
106
|
+
Basic — run a local R script
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from pathlib import Path
|
|
110
|
+
from rpy_bridge import RFunctionCaller
|
|
111
|
+
|
|
112
|
+
# If your project uses renv, pass the project directory (parent of renv/)
|
|
113
|
+
project_dir = Path("/path/to/your-r-project")
|
|
114
|
+
script = project_dir / "scripts" / "example.R"
|
|
115
|
+
|
|
116
|
+
# If you do not use renv, pass None for path_to_renv
|
|
117
|
+
caller = RFunctionCaller(path_to_renv=project_dir, script_path=script)
|
|
118
|
+
result = caller.call("some_function", 42, named_arg="value")
|
|
119
|
+
print(type(result))
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Notes:
|
|
123
|
+
|
|
124
|
+
`path_to_renv` may be either the project directory (containing `renv/`) or
|
|
125
|
+
the `renv/` directory itself. When provided, `RFunctionCaller` will call
|
|
126
|
+
`renv::load()` so the R session uses the project's library versions. If
|
|
127
|
+
`path_to_renv` is `None`, `rpy-bridge` will use whatever R environment is
|
|
128
|
+
visible to the Python process (system R or an R environment you activated
|
|
129
|
+
before starting Python).
|
|
130
|
+
|
|
131
|
+
The intended workflow is:
|
|
132
|
+
|
|
133
|
+
- Clone or download the R script into your local filesystem (review the
|
|
134
|
+
code if it came from a remote source).
|
|
135
|
+
- Construct an `RFunctionCaller` with `script_path` pointing to the local
|
|
136
|
+
script and optionally `path_to_renv` to activate the project's R library.
|
|
137
|
+
|
|
138
|
+
This keeps network, token, and SSL concerns outside the package while
|
|
139
|
+
preserving an easy path for Python-first users to call R-written functions.
|
|
140
|
+
|
|
141
|
+
If you need to run an R script from a remote repository, clone or download
|
|
142
|
+
the script locally, review it, and then construct an `RFunctionCaller`
|
|
143
|
+
pointing at the local `script_path`. This keeps network, token, and SSL
|
|
144
|
+
concerns outside the package and avoids environment-specific failures.
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from rpy_bridge import RFunctionCaller
|
|
148
|
+
|
|
149
|
+
project_dir = Path("/path/to/cloned/repo")
|
|
150
|
+
script = project_dir / "scripts" / "analysis.R"
|
|
151
|
+
|
|
152
|
+
caller = RFunctionCaller(path_to_renv=None, script_path=script)
|
|
153
|
+
result = caller.call("analyze", some_arg=42)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## R Setup
|
|
157
|
+
|
|
158
|
+
If you plan to execute R code with `rpy-bridge`, use the helper scripts in
|
|
159
|
+
`examples/r-deps/` to prepare an R environment.
|
|
160
|
+
|
|
161
|
+
- On macOS (Homebrew) install system deps:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
bash examples/r-deps/install_r_dev_deps_homebrew.sh
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
- Initialize a project `renv` (run in an R session):
|
|
168
|
+
|
|
169
|
+
```r
|
|
170
|
+
source("examples/r-deps/setup_env.R")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
- Restore the environment on a new machine:
|
|
174
|
+
|
|
175
|
+
```r
|
|
176
|
+
renv::restore()
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Review the scripts in `examples/r-deps/` before running; they install system
|
|
180
|
+
libraries and R packages and should be run from a trusted environment. For
|
|
181
|
+
CI, use `r-lib/actions/setup-r` to install R, then run the `Rscript` command
|
|
182
|
+
above to prepare the `renv` environment.
|
|
183
|
+
|
|
184
|
+
## Collaboration note
|
|
185
|
+
|
|
186
|
+
This repository provides example R setup scripts for teams working across
|
|
187
|
+
Python and R. Each project may require different R packages — check the
|
|
188
|
+
package list in `examples/r-deps/setup_env.R` and commit a `renv.lock` for
|
|
189
|
+
project-specific reproducibility.
|
|
190
|
+
|
|
191
|
+
Clone repositories containing R scripts locally or use your
|
|
192
|
+
preferred tooling to obtain scripts before execution.
|
|
193
|
+
|
|
194
|
+
## Licensing
|
|
195
|
+
|
|
196
|
+
- `rpy-bridge` is released under the MIT License © 2025 Victoria Cheung.
|
|
197
|
+
- The project depends on [`rpy2`](https://rpy2.github.io) which is licensed
|
|
198
|
+
under the GNU General Public License v2 (or later). Distributing binaries that
|
|
199
|
+
bundle `rpy2` must comply with the GPL terms. When you install `rpy-bridge`
|
|
200
|
+
as a dependency, `rpy2` is resolved directly from its upstream maintainers.
|
|
201
|
+
|
|
202
|
+
### Thanks
|
|
203
|
+
|
|
204
|
+
This package was spun out of internal tooling at Revolution Medicines.
|
|
205
|
+
Many thanks to the team there for allowing the code to be open sourced.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
README.rst
|
|
4
|
+
pyproject.toml
|
|
5
|
+
src/rpy_bridge/__init__.py
|
|
6
|
+
src/rpy_bridge/py.typed
|
|
7
|
+
src/rpy_bridge/rpy2_utils.py
|
|
8
|
+
src/rpy_bridge.egg-info/PKG-INFO
|
|
9
|
+
src/rpy_bridge.egg-info/SOURCES.txt
|
|
10
|
+
src/rpy_bridge.egg-info/dependency_links.txt
|
|
11
|
+
src/rpy_bridge.egg-info/requires.txt
|
|
12
|
+
src/rpy_bridge.egg-info/top_level.txt
|
|
13
|
+
tests/test_wrapper.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
rpy_bridge
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import pytest
|
|
3
|
+
|
|
4
|
+
from rpy_bridge import RFunctionCaller
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_missing_script_raises():
|
|
8
|
+
# If script_path does not exist, the constructor should raise FileNotFoundError
|
|
9
|
+
with pytest.raises(FileNotFoundError):
|
|
10
|
+
RFunctionCaller(path_to_renv=None, script_path=Path("/does/not/exist.R"))
|