rpy-bridge 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rpy_bridge-0.5.0/PKG-INFO +297 -0
- rpy_bridge-0.5.0/README.md +238 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/pyproject.toml +1 -1
- rpy_bridge-0.5.0/src/rpy_bridge/__init__.py +14 -0
- rpy_bridge-0.5.0/src/rpy_bridge/compare.py +106 -0
- rpy_bridge-0.5.0/src/rpy_bridge/convert.py +63 -0
- rpy_bridge-0.5.0/src/rpy_bridge/core.py +505 -0
- rpy_bridge-0.5.0/src/rpy_bridge/dataframe.py +74 -0
- rpy_bridge-0.5.0/src/rpy_bridge/env.py +108 -0
- rpy_bridge-0.5.0/src/rpy_bridge/logging.py +50 -0
- rpy_bridge-0.5.0/src/rpy_bridge/renv.py +149 -0
- rpy_bridge-0.5.0/src/rpy_bridge/rpy2_loader.py +71 -0
- rpy_bridge-0.5.0/src/rpy_bridge.egg-info/PKG-INFO +297 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/src/rpy_bridge.egg-info/SOURCES.txt +8 -1
- rpy_bridge-0.4.0/PKG-INFO +0 -258
- rpy_bridge-0.4.0/README.md +0 -199
- rpy_bridge-0.4.0/src/rpy_bridge/__init__.py +0 -38
- rpy_bridge-0.4.0/src/rpy_bridge/rpy2_utils.py +0 -1221
- rpy_bridge-0.4.0/src/rpy_bridge.egg-info/PKG-INFO +0 -258
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/LICENSE +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/README.rst +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/setup.cfg +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/src/rpy_bridge/py.typed +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/src/rpy_bridge.egg-info/dependency_links.txt +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/src/rpy_bridge.egg-info/requires.txt +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/src/rpy_bridge.egg-info/top_level.txt +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/tests/test_package_call.py +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/tests/test_py2r.py +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/tests/test_roundtrip.py +0 -0
- {rpy_bridge-0.4.0 → rpy_bridge-0.5.0}/tests/test_wrapper.py +0 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rpy-bridge
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution.
|
|
5
|
+
Author-email: Victoria Cheung <victoriakcheung@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Victoria Cheung
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Acknowledgement: This project builds on work originally developed at
|
|
29
|
+
Revolution Medicines and interfaces with the rpy2 project, which is licensed
|
|
30
|
+
under the GNU General Public License version 2 or later.
|
|
31
|
+
|
|
32
|
+
Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
|
|
33
|
+
Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
|
|
34
|
+
Keywords: python,r,rpy2,python-r,interoperability,data-science,statistics,bioinformatics
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Intended Audience :: Developers
|
|
41
|
+
Classifier: Intended Audience :: Science/Research
|
|
42
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
43
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
44
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
45
|
+
Requires-Python: >=3.11
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
License-File: LICENSE
|
|
48
|
+
Requires-Dist: numpy>=1.24
|
|
49
|
+
Requires-Dist: pandas>=2.0
|
|
50
|
+
Requires-Dist: loguru>=0.7
|
|
51
|
+
Provides-Extra: r
|
|
52
|
+
Requires-Dist: rpy2>=3.5; extra == "r"
|
|
53
|
+
Provides-Extra: dev
|
|
54
|
+
Requires-Dist: ipykernel>=7.1.0; extra == "dev"
|
|
55
|
+
Provides-Extra: docs
|
|
56
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
57
|
+
Requires-Dist: myst-parser; extra == "docs"
|
|
58
|
+
Dynamic: license-file
|
|
59
|
+
|
|
60
|
+
# rpy-bridge
|
|
61
|
+
|
|
62
|
+
**rpy-bridge** is a Python-controlled **R execution orchestrator** that enables
|
|
63
|
+
Python code to run R functions, scripts, and packages with **reproducible
|
|
64
|
+
filesystem and environment semantics**.
|
|
65
|
+
|
|
66
|
+
It is built on top of `rpy2`, but unlike thin wrappers, rpy-bridge stabilizes how
|
|
67
|
+
R code is executed when invoked from Python: project roots are inferred, `renv`
|
|
68
|
+
environments can be activated out-of-tree, relative paths behave as expected,
|
|
69
|
+
and return values are normalized for safe Python consumption.
|
|
70
|
+
|
|
71
|
+
This makes rpy-bridge suitable for production pipelines, CI, and bilingual
|
|
72
|
+
Python/R teams where R code must run reliably outside an interactive R session.
|
|
73
|
+
|
|
74
|
+
**Latest release:** [`rpy-bridge` on PyPI](https://pypi.org/project/rpy-bridge/)
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## What this is (and is not)
|
|
79
|
+
|
|
80
|
+
rpy-bridge **is not a thin rpy2 wrapper**.
|
|
81
|
+
|
|
82
|
+
Typical rpy2 usage assumes:
|
|
83
|
+
- the Python working directory is the R project root
|
|
84
|
+
- `renv` lives next to the executing script
|
|
85
|
+
- relative paths resolve correctly by default
|
|
86
|
+
- all R code executes in `globalenv()`
|
|
87
|
+
|
|
88
|
+
These assumptions break quickly in real-world Python workflows.
|
|
89
|
+
|
|
90
|
+
rpy-bridge instead provides a **controlled R runtime** with explicit guarantees
|
|
91
|
+
around execution context, filesystem behavior, and environment activation.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Core capabilities
|
|
96
|
+
|
|
97
|
+
### 1. R execution orchestration
|
|
98
|
+
|
|
99
|
+
- Embeds R via `rpy2` with deterministic startup behavior
|
|
100
|
+
- Disables interactive and GUI-dependent hooks for headless execution
|
|
101
|
+
- Loads R scripts into isolated namespaces (not `globalenv()`)
|
|
102
|
+
|
|
103
|
+
### 2. Project root inference and path stability
|
|
104
|
+
|
|
105
|
+
- Infers R project roots using markers such as:
|
|
106
|
+
`.git`, `.Rproj`, `renv.lock`, `DESCRIPTION`, `.here`
|
|
107
|
+
- Executes R code from the inferred project root regardless of Python CWD
|
|
108
|
+
- Preserves relative-path behavior expected by R scripts
|
|
109
|
+
- Supports R code using `here::here()` or project-local data
|
|
110
|
+
|
|
111
|
+
### 3. Out-of-tree `renv` activation
|
|
112
|
+
|
|
113
|
+
- Activates `renv` projects located **outside** the calling Python directory
|
|
114
|
+
- Sources `.Rprofile` and `.Renviron` to reproduce R startup semantics
|
|
115
|
+
- Does not require R scripts and `renv` to live in the same directory
|
|
116
|
+
|
|
117
|
+
### 4. Python ↔ R data conversion
|
|
118
|
+
|
|
119
|
+
- Converts Python scalars, lists, dicts, and pandas objects into R equivalents
|
|
120
|
+
- Converts R vectors, lists, and data.frames back into Python-native types
|
|
121
|
+
- Handles nested structures, missing values, and mixed types robustly
|
|
122
|
+
|
|
123
|
+
### 5. Data normalization and diagnostics
|
|
124
|
+
|
|
125
|
+
- Post-processes R data.frames to fix dtype, timezone, and NA semantics
|
|
126
|
+
- Normalizes column types for reliable Python-side comparison
|
|
127
|
+
- Supports structured mismatch diagnostics between Python and R data
|
|
128
|
+
|
|
129
|
+
### 6. Function invocation across scripts and packages
|
|
130
|
+
|
|
131
|
+
- Calls functions defined in sourced R scripts, base R, or installed packages
|
|
132
|
+
- Supports qualified function names (e.g. `stats::median`)
|
|
133
|
+
- Executes functions within the active project and library context
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Calling base R functions and managing packages
|
|
138
|
+
|
|
139
|
+
In addition to sourcing local R scripts, rpy-bridge supports calling functions
|
|
140
|
+
from base R and installed packages directly from Python.
|
|
141
|
+
|
|
142
|
+
Current support includes:
|
|
143
|
+
|
|
144
|
+
- Calling base R functions without a local R script
|
|
145
|
+
- Executing functions from installed R packages within the active environment
|
|
146
|
+
|
|
147
|
+
Planned extensions (roadmap):
|
|
148
|
+
|
|
149
|
+
- Programmatic installation of R packages into the active `renv` or system
|
|
150
|
+
environment when explicitly enabled
|
|
151
|
+
- Declarative package requirements at the Python call site
|
|
152
|
+
- Safe, opt-in package installation for CI and ephemeral environments
|
|
153
|
+
|
|
154
|
+
Package installation is intentionally **not automatic by default** to preserve
|
|
155
|
+
reproducibility and avoid side effects during execution.
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Installation
|
|
160
|
+
|
|
161
|
+
### Prerequisites
|
|
162
|
+
|
|
163
|
+
- System R installed and available on `PATH`
|
|
164
|
+
- Python 3.12+
|
|
165
|
+
|
|
166
|
+
### From PyPI
|
|
167
|
+
|
|
168
|
+
Install rpy-bridge with rpy2 for full R support:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
python3 -m pip install rpy-bridge rpy2
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Using `uv`:
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
uv add rpy-bridge rpy2
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Development install
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
python3 -m pip install -e .
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
or:
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
uv sync
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Required Python dependencies
|
|
193
|
+
|
|
194
|
+
- `rpy2`
|
|
195
|
+
- `pandas`
|
|
196
|
+
- `numpy`
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Usage
|
|
201
|
+
|
|
202
|
+
### Call a function from a local R script
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from pathlib import Path
|
|
206
|
+
from rpy_bridge import RFunctionCaller
|
|
207
|
+
|
|
208
|
+
project_dir = Path("/path/to/your-r-project")
|
|
209
|
+
script = project_dir / "scripts" / "example.R"
|
|
210
|
+
|
|
211
|
+
caller = RFunctionCaller(
|
|
212
|
+
path_to_renv=project_dir,
|
|
213
|
+
script_path=script,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
result = caller.call("some_function", 42, named_arg="value")
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Call base R functions (no local script)
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
from rpy_bridge import RFunctionCaller
|
|
223
|
+
|
|
224
|
+
caller = RFunctionCaller(path_to_renv=None)
|
|
225
|
+
|
|
226
|
+
samples = caller.call("stats::rnorm", 10, mean=0, sd=1)
|
|
227
|
+
median_val = caller.call("stats::median", samples)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## Round-trip Python ↔ R behavior
|
|
233
|
+
|
|
234
|
+
rpy-bridge attempts to convert Python objects to R and back. Most objects used in
|
|
235
|
+
scientific and ML pipelines round-trip cleanly, but some heterogeneous Python
|
|
236
|
+
structures may be wrapped or slightly altered due to differences in R’s type
|
|
237
|
+
system.
|
|
238
|
+
|
|
239
|
+
| Python type | Round-trip fidelity | Notes |
|
|
240
|
+
| ---------------------------------------------- | ------------------- | --------------------------------------------------------------------- |
|
|
241
|
+
| `int`, `float`, `bool`, `str` | High | Scalars convert directly |
|
|
242
|
+
| Homogeneous `list` of numbers/strings | High | Converted to atomic R vectors |
|
|
243
|
+
| Nested homogeneous lists | High | Converted to nested R lists |
|
|
244
|
+
| `pandas.DataFrame` / `pd.Series` | High | Converted to `data.frame` and normalized on return |
|
|
245
|
+
| Mixed-type `list` or `dict` | Partial | May be wrapped in single-element vectors |
|
|
246
|
+
| `None` / `pd.NA` | High | Converted to R `NULL` |
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## R setup helpers
|
|
251
|
+
|
|
252
|
+
Helper scripts are provided in `examples/r-deps/` to prepare R environments.
|
|
253
|
+
|
|
254
|
+
- Install system R dependencies (macOS / Homebrew):
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
bash examples/r-deps/install_r_dev_deps_homebrew.sh
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
- Initialize an `renv` project:
|
|
261
|
+
|
|
262
|
+
```r
|
|
263
|
+
source("examples/r-deps/setup_env.R")
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
- Restore the environment on a new machine:
|
|
267
|
+
|
|
268
|
+
```r
|
|
269
|
+
renv::restore()
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
## Who this is for
|
|
275
|
+
|
|
276
|
+
rpy-bridge is designed for:
|
|
277
|
+
|
|
278
|
+
- Python-first pipelines that rely on mature R code
|
|
279
|
+
- Teams where R logic must remain authoritative
|
|
280
|
+
- CI or production systems that cannot rely on interactive R sessions
|
|
281
|
+
- Multi-repo or multi-directory projects with non-trivial filesystem layouts
|
|
282
|
+
|
|
283
|
+
It is **not** intended as a convenience wrapper for exploratory R usage.
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## Licensing
|
|
288
|
+
|
|
289
|
+
- rpy-bridge is released under the MIT License © 2025 Victoria Cheung
|
|
290
|
+
- Depends on [`rpy2`](https://rpy2.github.io), licensed under the GNU GPL (v2 or later)
|
|
291
|
+
|
|
292
|
+
---
|
|
293
|
+
|
|
294
|
+
## Acknowledgements
|
|
295
|
+
|
|
296
|
+
This package was spun out of internal tooling I wrote at Revolution Medicines.
|
|
297
|
+
Thanks to the team there for supporting its open-source release.
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# rpy-bridge
|
|
2
|
+
|
|
3
|
+
**rpy-bridge** is a Python-controlled **R execution orchestrator** that enables
|
|
4
|
+
Python code to run R functions, scripts, and packages with **reproducible
|
|
5
|
+
filesystem and environment semantics**.
|
|
6
|
+
|
|
7
|
+
It is built on top of `rpy2`, but unlike thin wrappers, rpy-bridge stabilizes how
|
|
8
|
+
R code is executed when invoked from Python: project roots are inferred, `renv`
|
|
9
|
+
environments can be activated out-of-tree, relative paths behave as expected,
|
|
10
|
+
and return values are normalized for safe Python consumption.
|
|
11
|
+
|
|
12
|
+
This makes rpy-bridge suitable for production pipelines, CI, and bilingual
|
|
13
|
+
Python/R teams where R code must run reliably outside an interactive R session.
|
|
14
|
+
|
|
15
|
+
**Latest release:** [`rpy-bridge` on PyPI](https://pypi.org/project/rpy-bridge/)
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## What this is (and is not)
|
|
20
|
+
|
|
21
|
+
rpy-bridge **is not a thin rpy2 wrapper**.
|
|
22
|
+
|
|
23
|
+
Typical rpy2 usage assumes:
|
|
24
|
+
- the Python working directory is the R project root
|
|
25
|
+
- `renv` lives next to the executing script
|
|
26
|
+
- relative paths resolve correctly by default
|
|
27
|
+
- all R code executes in `globalenv()`
|
|
28
|
+
|
|
29
|
+
These assumptions break quickly in real-world Python workflows.
|
|
30
|
+
|
|
31
|
+
rpy-bridge instead provides a **controlled R runtime** with explicit guarantees
|
|
32
|
+
around execution context, filesystem behavior, and environment activation.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Core capabilities
|
|
37
|
+
|
|
38
|
+
### 1. R execution orchestration
|
|
39
|
+
|
|
40
|
+
- Embeds R via `rpy2` with deterministic startup behavior
|
|
41
|
+
- Disables interactive and GUI-dependent hooks for headless execution
|
|
42
|
+
- Loads R scripts into isolated namespaces (not `globalenv()`)
|
|
43
|
+
|
|
44
|
+
### 2. Project root inference and path stability
|
|
45
|
+
|
|
46
|
+
- Infers R project roots using markers such as:
|
|
47
|
+
`.git`, `.Rproj`, `renv.lock`, `DESCRIPTION`, `.here`
|
|
48
|
+
- Executes R code from the inferred project root regardless of Python CWD
|
|
49
|
+
- Preserves relative-path behavior expected by R scripts
|
|
50
|
+
- Supports R code using `here::here()` or project-local data
|
|
51
|
+
|
|
52
|
+
### 3. Out-of-tree `renv` activation
|
|
53
|
+
|
|
54
|
+
- Activates `renv` projects located **outside** the calling Python directory
|
|
55
|
+
- Sources `.Rprofile` and `.Renviron` to reproduce R startup semantics
|
|
56
|
+
- Does not require R scripts and `renv` to live in the same directory
|
|
57
|
+
|
|
58
|
+
### 4. Python ↔ R data conversion
|
|
59
|
+
|
|
60
|
+
- Converts Python scalars, lists, dicts, and pandas objects into R equivalents
|
|
61
|
+
- Converts R vectors, lists, and data.frames back into Python-native types
|
|
62
|
+
- Handles nested structures, missing values, and mixed types robustly
|
|
63
|
+
|
|
64
|
+
### 5. Data normalization and diagnostics
|
|
65
|
+
|
|
66
|
+
- Post-processes R data.frames to fix dtype, timezone, and NA semantics
|
|
67
|
+
- Normalizes column types for reliable Python-side comparison
|
|
68
|
+
- Supports structured mismatch diagnostics between Python and R data
|
|
69
|
+
|
|
70
|
+
### 6. Function invocation across scripts and packages
|
|
71
|
+
|
|
72
|
+
- Calls functions defined in sourced R scripts, base R, or installed packages
|
|
73
|
+
- Supports qualified function names (e.g. `stats::median`)
|
|
74
|
+
- Executes functions within the active project and library context
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Calling base R functions and managing packages
|
|
79
|
+
|
|
80
|
+
In addition to sourcing local R scripts, rpy-bridge supports calling functions
|
|
81
|
+
from base R and installed packages directly from Python.
|
|
82
|
+
|
|
83
|
+
Current support includes:
|
|
84
|
+
|
|
85
|
+
- Calling base R functions without a local R script
|
|
86
|
+
- Executing functions from installed R packages within the active environment
|
|
87
|
+
|
|
88
|
+
Planned extensions (roadmap):
|
|
89
|
+
|
|
90
|
+
- Programmatic installation of R packages into the active `renv` or system
|
|
91
|
+
environment when explicitly enabled
|
|
92
|
+
- Declarative package requirements at the Python call site
|
|
93
|
+
- Safe, opt-in package installation for CI and ephemeral environments
|
|
94
|
+
|
|
95
|
+
Package installation is intentionally **not automatic by default** to preserve
|
|
96
|
+
reproducibility and avoid side effects during execution.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Installation
|
|
101
|
+
|
|
102
|
+
### Prerequisites
|
|
103
|
+
|
|
104
|
+
- System R installed and available on `PATH`
|
|
105
|
+
- Python 3.12+
|
|
106
|
+
|
|
107
|
+
### From PyPI
|
|
108
|
+
|
|
109
|
+
Install rpy-bridge with rpy2 for full R support:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
python3 -m pip install rpy-bridge rpy2
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Using `uv`:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
uv add rpy-bridge rpy2
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Development install
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
python3 -m pip install -e .
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
or:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
uv sync
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Required Python dependencies
|
|
134
|
+
|
|
135
|
+
- `rpy2`
|
|
136
|
+
- `pandas`
|
|
137
|
+
- `numpy`
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Usage
|
|
142
|
+
|
|
143
|
+
### Call a function from a local R script
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from pathlib import Path
|
|
147
|
+
from rpy_bridge import RFunctionCaller
|
|
148
|
+
|
|
149
|
+
project_dir = Path("/path/to/your-r-project")
|
|
150
|
+
script = project_dir / "scripts" / "example.R"
|
|
151
|
+
|
|
152
|
+
caller = RFunctionCaller(
|
|
153
|
+
path_to_renv=project_dir,
|
|
154
|
+
script_path=script,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
result = caller.call("some_function", 42, named_arg="value")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Call base R functions (no local script)
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
from rpy_bridge import RFunctionCaller
|
|
164
|
+
|
|
165
|
+
caller = RFunctionCaller(path_to_renv=None)
|
|
166
|
+
|
|
167
|
+
samples = caller.call("stats::rnorm", 10, mean=0, sd=1)
|
|
168
|
+
median_val = caller.call("stats::median", samples)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## Round-trip Python ↔ R behavior
|
|
174
|
+
|
|
175
|
+
rpy-bridge attempts to convert Python objects to R and back. Most objects used in
|
|
176
|
+
scientific and ML pipelines round-trip cleanly, but some heterogeneous Python
|
|
177
|
+
structures may be wrapped or slightly altered due to differences in R’s type
|
|
178
|
+
system.
|
|
179
|
+
|
|
180
|
+
| Python type | Round-trip fidelity | Notes |
|
|
181
|
+
| ---------------------------------------------- | ------------------- | --------------------------------------------------------------------- |
|
|
182
|
+
| `int`, `float`, `bool`, `str` | High | Scalars convert directly |
|
|
183
|
+
| Homogeneous `list` of numbers/strings | High | Converted to atomic R vectors |
|
|
184
|
+
| Nested homogeneous lists | High | Converted to nested R lists |
|
|
185
|
+
| `pandas.DataFrame` / `pd.Series` | High | Converted to `data.frame` and normalized on return |
|
|
186
|
+
| Mixed-type `list` or `dict` | Partial | May be wrapped in single-element vectors |
|
|
187
|
+
| `None` / `pd.NA` | High | Converted to R `NULL` |
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## R setup helpers
|
|
192
|
+
|
|
193
|
+
Helper scripts are provided in `examples/r-deps/` to prepare R environments.
|
|
194
|
+
|
|
195
|
+
- Install system R dependencies (macOS / Homebrew):
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
bash examples/r-deps/install_r_dev_deps_homebrew.sh
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
- Initialize an `renv` project:
|
|
202
|
+
|
|
203
|
+
```r
|
|
204
|
+
source("examples/r-deps/setup_env.R")
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
- Restore the environment on a new machine:
|
|
208
|
+
|
|
209
|
+
```r
|
|
210
|
+
renv::restore()
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Who this is for
|
|
216
|
+
|
|
217
|
+
rpy-bridge is designed for:
|
|
218
|
+
|
|
219
|
+
- Python-first pipelines that rely on mature R code
|
|
220
|
+
- Teams where R logic must remain authoritative
|
|
221
|
+
- CI or production systems that cannot rely on interactive R sessions
|
|
222
|
+
- Multi-repo or multi-directory projects with non-trivial filesystem layouts
|
|
223
|
+
|
|
224
|
+
It is **not** intended as a convenience wrapper for exploratory R usage.
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Licensing
|
|
229
|
+
|
|
230
|
+
- rpy-bridge is released under the MIT License © 2025 Victoria Cheung
|
|
231
|
+
- Depends on [`rpy2`](https://rpy2.github.io), licensed under the GNU GPL (v2 or later)
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Acknowledgements
|
|
236
|
+
|
|
237
|
+
This package was spun out of internal tooling I wrote at Revolution Medicines.
|
|
238
|
+
Thanks to the team there for supporting its open-source release.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "rpy-bridge"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = "Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { file = "LICENSE" }
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Public API for the rpy-bridge package.
|
|
3
|
+
|
|
4
|
+
`RFunctionCaller` is the primary entry point for loading R scripts and calling
|
|
5
|
+
functions. Other helpers are re-exported for compatibility.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .core import RFunctionCaller
|
|
9
|
+
from .renv import activate_renv
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"activate_renv",
|
|
13
|
+
"RFunctionCaller",
|
|
14
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DataFrame comparison helpers used to validate parity between R and Python outputs.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from .dataframe import fix_r_dataframe_types, fix_string_nans
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
16
|
+
for col in df1.columns.intersection(df2.columns):
|
|
17
|
+
df1[col] = df1[col].replace("", pd.NA)
|
|
18
|
+
df2[col] = df2[col].replace("", pd.NA)
|
|
19
|
+
s1, s2 = df1[col], df2[col]
|
|
20
|
+
dtype1, dtype2 = s1.dtype, s2.dtype
|
|
21
|
+
if (pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_object_dtype(dtype2)) or (
|
|
22
|
+
pd.api.types.is_object_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2)
|
|
23
|
+
):
|
|
24
|
+
try:
|
|
25
|
+
df1[col] = pd.to_numeric(s1, errors="coerce")
|
|
26
|
+
df2[col] = pd.to_numeric(s2, errors="coerce")
|
|
27
|
+
continue
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2):
|
|
31
|
+
df1[col] = df1[col].astype("float64")
|
|
32
|
+
df2[col] = df2[col].astype("float64")
|
|
33
|
+
continue
|
|
34
|
+
if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
|
|
35
|
+
df1[col] = df1[col].astype(str)
|
|
36
|
+
df2[col] = df2[col].astype(str)
|
|
37
|
+
return df1, df2
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
41
|
+
for col in df1.columns.intersection(df2.columns):
|
|
42
|
+
s1, s2 = df1[col].replace("", pd.NA), df2[col].replace("", pd.NA)
|
|
43
|
+
try:
|
|
44
|
+
s1_num = pd.to_numeric(s1, errors="coerce")
|
|
45
|
+
s2_num = pd.to_numeric(s2, errors="coerce")
|
|
46
|
+
if not s1_num.isna().all() or not s2_num.isna().all():
|
|
47
|
+
df1[col] = s1_num.astype("float64")
|
|
48
|
+
df2[col] = s2_num.astype("float64")
|
|
49
|
+
continue
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
df1[col], df2[col] = s1, s2
|
|
53
|
+
return df1, df2
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8) -> dict:
|
|
57
|
+
results: dict[str, Any] = {
|
|
58
|
+
"shape_mismatch": False,
|
|
59
|
+
"columns_mismatch": False,
|
|
60
|
+
"index_mismatch": False,
|
|
61
|
+
"numeric_diffs": {},
|
|
62
|
+
"non_numeric_diffs": {},
|
|
63
|
+
}
|
|
64
|
+
df2 = fix_r_dataframe_types(df2)
|
|
65
|
+
df1 = fix_string_nans(df1)
|
|
66
|
+
df2 = fix_string_nans(df2)
|
|
67
|
+
df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
|
|
68
|
+
df1, df2 = align_numeric_dtypes(df1, df2)
|
|
69
|
+
if df1.shape != df2.shape:
|
|
70
|
+
results["shape_mismatch"] = True
|
|
71
|
+
print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
|
|
72
|
+
if set(df1.columns) != set(df2.columns):
|
|
73
|
+
results["columns_mismatch"] = True
|
|
74
|
+
print("[Warning] Column mismatch:")
|
|
75
|
+
print(f" df1: {df1.columns}")
|
|
76
|
+
print(f" df2: {df2.columns}")
|
|
77
|
+
common_cols = df1.columns.intersection(df2.columns)
|
|
78
|
+
else:
|
|
79
|
+
common_cols = df1.columns
|
|
80
|
+
df1_aligned, df2_aligned = df1.loc[:, common_cols], df2.loc[:, common_cols]
|
|
81
|
+
for col in common_cols:
|
|
82
|
+
col_py, col_r = df1_aligned[col], df2_aligned[col]
|
|
83
|
+
if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(col_r):
|
|
84
|
+
col_py, col_r = col_py.align(col_r)
|
|
85
|
+
close = np.isclose(
|
|
86
|
+
col_py.fillna(np.nan),
|
|
87
|
+
col_r.fillna(np.nan),
|
|
88
|
+
atol=float_tol,
|
|
89
|
+
equal_nan=True,
|
|
90
|
+
)
|
|
91
|
+
if not close.all():
|
|
92
|
+
results["numeric_diffs"][col] = pd.DataFrame(
|
|
93
|
+
{"df1": col_py[~close], "df2": col_r[~close]}
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
unequal = ~col_py.eq(col_r)
|
|
97
|
+
both_na = col_py.isna() & col_r.isna()
|
|
98
|
+
unequal = unequal & ~both_na
|
|
99
|
+
if unequal.any():
|
|
100
|
+
results["non_numeric_diffs"][col] = pd.DataFrame(
|
|
101
|
+
{"df1": col_py[unequal], "df2": col_r[unequal]}
|
|
102
|
+
)
|
|
103
|
+
return results
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
__all__ = ["normalize_dtypes", "align_numeric_dtypes", "compare_r_py_dataframes"]
|