rpy-bridge 0.1.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: rpy-bridge
3
+ Version: 0.3.1
4
+ Summary: Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution.
5
+ Author-email: Victoria Cheung <victoriakcheung@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Victoria Cheung
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Acknowledgement: This project builds on work originally developed at
29
+ Revolution Medicines and interfaces with the rpy2 project, which is licensed
30
+ under the GNU General Public License version 2 or later.
31
+
32
+ Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
33
+ Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
40
+ Requires-Python: >=3.11
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+ Requires-Dist: numpy>=1.24
44
+ Requires-Dist: pandas>=2.0
45
+ Requires-Dist: loguru>=0.7
46
+ Provides-Extra: r
47
+ Requires-Dist: rpy2>=3.5; extra == "r"
48
+ Provides-Extra: dev
49
+ Requires-Dist: ipykernel>=7.1.0; extra == "dev"
50
+ Provides-Extra: docs
51
+ Requires-Dist: sphinx; extra == "docs"
52
+ Requires-Dist: myst-parser; extra == "docs"
53
+ Dynamic: license-file
54
+
55
+ # rpy-bridge
56
+
57
+ rpy-bridge is a Python-to-R a robust interoperability engine that combines environment management, type-safe conversions, data normalization, and safe function execution to make Python-R collaboration seamless.
58
+
59
+ It enables Python developers to call R functions, scripts, and packages safely while preserving type fidelity and project-specific R environments. This is ideal for bilingual teams where R authors maintain core logic, and Python-centric users need reliable access without rewriting code.
60
+
61
+ **Latest release:** [`rpy-bridge` on PyPI](https://pypi.org/project/rpy-bridge/)
62
+
63
+ ---
64
+
65
+ ## Key layers and capabilities
66
+
67
+ ### 1. Lazy and robust R integration
68
+
69
+ - Automatically detects or sets R_HOME and ensures rpy2 is installed.
70
+ - Configures platform-specific dynamic library paths for macOS/Linux.
71
+
72
+ ### 2. Environment management
73
+
74
+ - Activates renv projects and loads project-specific libraries if it exists, otherwise use current environemnt.
75
+ - Sources .Renviron and .Rprofile files to replicate the R project environment in Python.
76
+
77
+ ### 3. Python ↔ R type conversion
78
+
79
+ - Converts Python scalars, lists, dicts, and pandas DataFrames into appropriate R objects.
80
+ - Converts R atomic vectors, ListVector/NamedList, and data.frames back into Python-native objects.
81
+ - Handles nested structures, mixed types, and missing values robustly (NA_* → None/pd.NA).
82
+
83
+ ### 4. Data hygiene and normalization
84
+
85
+ - Post-processes R DataFrames: fixes dtypes, numeric/date conversions, and timezone issues.
86
+ - Normalizes and aligns column types for accurate Python comparisons.
87
+ - Supports comparing Python and R DataFrames with mismatch diagnostics.
88
+
89
+ ### 5. Function calling
90
+
91
+ - Calls functions from R scripts, base R, or installed packages safely.
92
+ - Automatically converts arguments and return values, including keyword arguments.
93
+ - Supports mixed data types, nested structures, and DataFrames seamlessly.
94
+
95
+ ### 6. Python-first workflow for R code
96
+
97
+ - Enables Python developers to reuse R functions without needing deep R knowledge.
98
+ - Keeps network, token, and SSL concerns outside the package when sourcing scripts locally.
99
+ - Designed for reproducibility and safe execution in CI or cross-platform environments.
100
+
101
+ ---
102
+
103
+ ## Installation
104
+
105
+ **Prerequisites**
106
+
107
+ - System R installed and available on `PATH` (rpy2 requires a working R installation).
108
+ - Python 3.12+
109
+
110
+ **From PyPI:**
111
+
112
+ ```bash
113
+ python3 -m pip install rpy-bridge
114
+ ```
115
+
116
+ or using `uv`:
117
+
118
+ ```bash
119
+ uv add rpy-bridge
120
+ ```
121
+
122
+ **During development (editable install):**
123
+
124
+ ```bash
125
+ python3 -m pip install -e .
126
+ ```
127
+
128
+ or using `uv`:
129
+
130
+ ```bash
131
+ uv sync
132
+ ```
133
+
134
+ **Required Python packages** (the installer will pull these in):
135
+
136
+ - `rpy2` (GPLv2 or later)
137
+ - `pandas`
138
+ - `numpy`
139
+
140
+ ---
141
+
142
+ ## Usage
143
+
144
+ ```python
145
+ from pathlib import Path
146
+ from rpy_bridge import RFunctionCaller
147
+
148
+ caller = RFunctionCaller(
149
+ path_to_renv=Path("/path/to/project"),
150
+ script_path=Path("/path/to/script.R"),
151
+ )
152
+
153
+ summary_df = caller.call("summarize_cohort", cohort_df)
154
+ ```
155
+
156
+ ---
157
+
158
+ ## Round-trip Python ↔ R behavior
159
+
160
+ `rpy-bridge` attempts to convert Python objects to R and back. Most objects used in scientific/ML pipelines round-trip cleanly, but some heterogeneous Python structures may be wrapped or slightly altered. This is normal due to R's type system.
161
+
162
+ | Python type | Round-trip fidelity | Notes |
163
+ | ---------------------------------------------- | ------------------- | --------------------------------------------------------------------- |
164
+ | `int`, `float`, `bool`, `str` | ✅ High | Scalars convert directly |
165
+ | Homogeneous `list` of numbers/strings/booleans | ✅ High | Converted to atomic R vectors |
166
+ | Nested lists of homogeneous types | ✅ High | Converted to nested R `ListVector` |
167
+ | `pandas.DataFrame` / `pd.Series` | ✅ High | Converted to `data.frame` / R vector, post-processed back |
168
+ | Mixed-type `list` or heterogeneous `dict` | ⚠️ Partial | Elements wrapped in single-element vectors; round-trip may alter type |
169
+ | Python `None` / `pd.NA` | ✅ High | Converted to R `NULL` |
170
+
171
+ # Guidance
172
+
173
+ - Typical workflows (DataFrames, numeric arrays, series, homogeneous lists) are fully supported.
174
+ - Rare or highly heterogeneous Python objects may not round-trip perfectly.
175
+ - Round-trip fidelity is mainly a “nice-to-have” for debugging. For production pipelines, it’s safe to focus on supported types.
176
+
177
+ ---
178
+
179
+ ## Examples
180
+
181
+ ### Basic — run a local R script
182
+
183
+ ```python
184
+ from pathlib import Path
185
+ from rpy_bridge import RFunctionCaller
186
+
187
+ project_dir = Path("/path/to/your-r-project")
188
+ script = project_dir / "scripts" / "example.R"
189
+
190
+ caller = RFunctionCaller(path_to_renv=project_dir, script_path=script)
191
+ result = caller.call("some_function", 42, named_arg="value")
192
+ print(type(result))
193
+ ```
194
+
195
+ ### Call installed R packages (no local script)
196
+
197
+ ```python
198
+ from rpy_bridge import RFunctionCaller
199
+
200
+ caller = RFunctionCaller(path_to_renv=None, packages=["stats"])
201
+ samples = caller.call("rnorm", 5, mean=10)
202
+ print(type(samples)) # typically a numpy.ndarray
203
+
204
+ median_val = caller.call("stats::median", samples)
205
+ print(median_val)
206
+ ```
207
+
208
+ ---
209
+
210
+ ## R Setup
211
+
212
+ If you plan to execute R code with `rpy-bridge`, use the helper scripts in
213
+ `examples/r-deps/` to prepare an R environment.
214
+
215
+ - On macOS (Homebrew) install system deps:
216
+
217
+ ```bash
218
+ bash examples/r-deps/install_r_dev_deps_homebrew.sh
219
+ ```
220
+
221
+ - Initialize a project `renv` (run in an R session):
222
+
223
+ ```r
224
+ source("examples/r-deps/setup_env.R")
225
+ ```
226
+
227
+ - Restore the environment on a new machine:
228
+
229
+ ```r
230
+ renv::restore()
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Collaboration note
236
+
237
+ This repository provides example R setup scripts for teams working across Python and R. Each project may require different R packages — check the package list in `examples/r-deps/setup_env.R` and commit a `renv.lock` for project-specific reproducibility.
238
+
239
+ Clone repositories containing R scripts locally or use your preferred tooling to obtain scripts before execution.
240
+
241
+ ---
242
+
243
+ ## Licensing
244
+
245
+ - `rpy-bridge` is released under the MIT License © 2025 Victoria Cheung.
246
+ - The project depends on [`rpy2`](https://rpy2.github.io) which is licensed under the GNU General Public License v2 (or later).
247
+
248
+ ### Thanks
249
+
250
+ This package was spun out of internal tooling at Revolution Medicines.
251
+ Many thanks to the team there for allowing the code to be open sourced.
@@ -0,0 +1,197 @@
1
+ # rpy-bridge
2
+
3
+ rpy-bridge is a Python-to-R a robust interoperability engine that combines environment management, type-safe conversions, data normalization, and safe function execution to make Python-R collaboration seamless.
4
+
5
+ It enables Python developers to call R functions, scripts, and packages safely while preserving type fidelity and project-specific R environments. This is ideal for bilingual teams where R authors maintain core logic, and Python-centric users need reliable access without rewriting code.
6
+
7
+ **Latest release:** [`rpy-bridge` on PyPI](https://pypi.org/project/rpy-bridge/)
8
+
9
+ ---
10
+
11
+ ## Key layers and capabilities
12
+
13
+ ### 1. Lazy and robust R integration
14
+
15
+ - Automatically detects or sets R_HOME and ensures rpy2 is installed.
16
+ - Configures platform-specific dynamic library paths for macOS/Linux.
17
+
18
+ ### 2. Environment management
19
+
20
+ - Activates renv projects and loads project-specific libraries if it exists, otherwise use current environemnt.
21
+ - Sources .Renviron and .Rprofile files to replicate the R project environment in Python.
22
+
23
+ ### 3. Python ↔ R type conversion
24
+
25
+ - Converts Python scalars, lists, dicts, and pandas DataFrames into appropriate R objects.
26
+ - Converts R atomic vectors, ListVector/NamedList, and data.frames back into Python-native objects.
27
+ - Handles nested structures, mixed types, and missing values robustly (NA_* → None/pd.NA).
28
+
29
+ ### 4. Data hygiene and normalization
30
+
31
+ - Post-processes R DataFrames: fixes dtypes, numeric/date conversions, and timezone issues.
32
+ - Normalizes and aligns column types for accurate Python comparisons.
33
+ - Supports comparing Python and R DataFrames with mismatch diagnostics.
34
+
35
+ ### 5. Function calling
36
+
37
+ - Calls functions from R scripts, base R, or installed packages safely.
38
+ - Automatically converts arguments and return values, including keyword arguments.
39
+ - Supports mixed data types, nested structures, and DataFrames seamlessly.
40
+
41
+ ### 6. Python-first workflow for R code
42
+
43
+ - Enables Python developers to reuse R functions without needing deep R knowledge.
44
+ - Keeps network, token, and SSL concerns outside the package when sourcing scripts locally.
45
+ - Designed for reproducibility and safe execution in CI or cross-platform environments.
46
+
47
+ ---
48
+
49
+ ## Installation
50
+
51
+ **Prerequisites**
52
+
53
+ - System R installed and available on `PATH` (rpy2 requires a working R installation).
54
+ - Python 3.12+
55
+
56
+ **From PyPI:**
57
+
58
+ ```bash
59
+ python3 -m pip install rpy-bridge
60
+ ```
61
+
62
+ or using `uv`:
63
+
64
+ ```bash
65
+ uv add rpy-bridge
66
+ ```
67
+
68
+ **During development (editable install):**
69
+
70
+ ```bash
71
+ python3 -m pip install -e .
72
+ ```
73
+
74
+ or using `uv`:
75
+
76
+ ```bash
77
+ uv sync
78
+ ```
79
+
80
+ **Required Python packages** (the installer will pull these in):
81
+
82
+ - `rpy2` (GPLv2 or later)
83
+ - `pandas`
84
+ - `numpy`
85
+
86
+ ---
87
+
88
+ ## Usage
89
+
90
+ ```python
91
+ from pathlib import Path
92
+ from rpy_bridge import RFunctionCaller
93
+
94
+ caller = RFunctionCaller(
95
+ path_to_renv=Path("/path/to/project"),
96
+ script_path=Path("/path/to/script.R"),
97
+ )
98
+
99
+ summary_df = caller.call("summarize_cohort", cohort_df)
100
+ ```
101
+
102
+ ---
103
+
104
+ ## Round-trip Python ↔ R behavior
105
+
106
+ `rpy-bridge` attempts to convert Python objects to R and back. Most objects used in scientific/ML pipelines round-trip cleanly, but some heterogeneous Python structures may be wrapped or slightly altered. This is normal due to R's type system.
107
+
108
+ | Python type | Round-trip fidelity | Notes |
109
+ | ---------------------------------------------- | ------------------- | --------------------------------------------------------------------- |
110
+ | `int`, `float`, `bool`, `str` | ✅ High | Scalars convert directly |
111
+ | Homogeneous `list` of numbers/strings/booleans | ✅ High | Converted to atomic R vectors |
112
+ | Nested lists of homogeneous types | ✅ High | Converted to nested R `ListVector` |
113
+ | `pandas.DataFrame` / `pd.Series` | ✅ High | Converted to `data.frame` / R vector, post-processed back |
114
+ | Mixed-type `list` or heterogeneous `dict` | ⚠️ Partial | Elements wrapped in single-element vectors; round-trip may alter type |
115
+ | Python `None` / `pd.NA` | ✅ High | Converted to R `NULL` |
116
+
117
+ # Guidance
118
+
119
+ - Typical workflows (DataFrames, numeric arrays, series, homogeneous lists) are fully supported.
120
+ - Rare or highly heterogeneous Python objects may not round-trip perfectly.
121
+ - Round-trip fidelity is mainly a “nice-to-have” for debugging. For production pipelines, it’s safe to focus on supported types.
122
+
123
+ ---
124
+
125
+ ## Examples
126
+
127
+ ### Basic — run a local R script
128
+
129
+ ```python
130
+ from pathlib import Path
131
+ from rpy_bridge import RFunctionCaller
132
+
133
+ project_dir = Path("/path/to/your-r-project")
134
+ script = project_dir / "scripts" / "example.R"
135
+
136
+ caller = RFunctionCaller(path_to_renv=project_dir, script_path=script)
137
+ result = caller.call("some_function", 42, named_arg="value")
138
+ print(type(result))
139
+ ```
140
+
141
+ ### Call installed R packages (no local script)
142
+
143
+ ```python
144
+ from rpy_bridge import RFunctionCaller
145
+
146
+ caller = RFunctionCaller(path_to_renv=None, packages=["stats"])
147
+ samples = caller.call("rnorm", 5, mean=10)
148
+ print(type(samples)) # typically a numpy.ndarray
149
+
150
+ median_val = caller.call("stats::median", samples)
151
+ print(median_val)
152
+ ```
153
+
154
+ ---
155
+
156
+ ## R Setup
157
+
158
+ If you plan to execute R code with `rpy-bridge`, use the helper scripts in
159
+ `examples/r-deps/` to prepare an R environment.
160
+
161
+ - On macOS (Homebrew) install system deps:
162
+
163
+ ```bash
164
+ bash examples/r-deps/install_r_dev_deps_homebrew.sh
165
+ ```
166
+
167
+ - Initialize a project `renv` (run in an R session):
168
+
169
+ ```r
170
+ source("examples/r-deps/setup_env.R")
171
+ ```
172
+
173
+ - Restore the environment on a new machine:
174
+
175
+ ```r
176
+ renv::restore()
177
+ ```
178
+
179
+ ---
180
+
181
+ ## Collaboration note
182
+
183
+ This repository provides example R setup scripts for teams working across Python and R. Each project may require different R packages — check the package list in `examples/r-deps/setup_env.R` and commit a `renv.lock` for project-specific reproducibility.
184
+
185
+ Clone repositories containing R scripts locally or use your preferred tooling to obtain scripts before execution.
186
+
187
+ ---
188
+
189
+ ## Licensing
190
+
191
+ - `rpy-bridge` is released under the MIT License © 2025 Victoria Cheung.
192
+ - The project depends on [`rpy2`](https://rpy2.github.io) which is licensed under the GNU General Public License v2 (or later).
193
+
194
+ ### Thanks
195
+
196
+ This package was spun out of internal tooling at Revolution Medicines.
197
+ Many thanks to the team there for allowing the code to be open sourced.
@@ -1,20 +1,21 @@
1
1
  [project]
2
2
  name = "rpy-bridge"
3
- version = "0.1.0"
4
- description = "Bridge helpers for calling R from Python via rpy2"
3
+ version = "0.3.1"
4
+ description = "Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution."
5
5
  readme = "README.md"
6
6
  license = { file = "LICENSE" }
7
7
  authors = [
8
8
  { name = "Victoria Cheung", email = "victoriakcheung@gmail.com" }
9
9
  ]
10
10
  requires-python = ">=3.11"
11
+
12
+ # Core dependencies: safe to install without system R
11
13
  dependencies = [
12
14
  "numpy>=1.24",
13
15
  "pandas>=2.0",
14
- "rpy2>=3.5",
15
16
  "loguru>=0.7",
16
- "ipykernel>=7.1.0",
17
17
  ]
18
+
18
19
  classifiers = [
19
20
  "License :: OSI Approved :: MIT License",
20
21
  "Programming Language :: Python",
@@ -24,6 +25,23 @@ classifiers = [
24
25
  "Topic :: Scientific/Engineering :: Bio-Informatics",
25
26
  ]
26
27
 
28
+ [project.optional-dependencies]
29
+ # R-backed functionality
30
+ r = [
31
+ "rpy2>=3.5",
32
+ ]
33
+
34
+ # Interactive / notebook usage
35
+ dev = [
36
+ "ipykernel>=7.1.0",
37
+ ]
38
+
39
+ # Documentation build dependencies (optional but recommended)
40
+ docs = [
41
+ "sphinx",
42
+ "myst-parser",
43
+ ]
44
+
27
45
  [project.urls]
28
46
  Homepage = "https://github.com/vic-cheung/rpy-bridge"
29
47
  "Issue Tracker" = "https://github.com/vic-cheung/rpy-bridge/issues"
@@ -16,7 +16,7 @@ from .rpy2_utils import (
16
16
  normalize_dtypes,
17
17
  normalize_single_df_dtypes,
18
18
  postprocess_r_dataframe,
19
- replace_r_na,
19
+ clean_r_missing,
20
20
  r_namedlist_to_dict,
21
21
  )
22
22
 
@@ -26,7 +26,7 @@ __all__ = [
26
26
  "r_namedlist_to_dict",
27
27
  "clean_r_dataframe",
28
28
  "fix_string_nans",
29
- "replace_r_na",
29
+ "clean_r_missing",
30
30
  "normalize_single_df_dtypes",
31
31
  "fix_r_dataframe_types",
32
32
  "postprocess_r_dataframe",