rpy-bridge 0.4.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. rpy_bridge-0.5.1/PKG-INFO +291 -0
  2. rpy_bridge-0.5.1/README.md +226 -0
  3. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/pyproject.toml +8 -14
  4. rpy_bridge-0.5.1/src/rpy_bridge/__init__.py +14 -0
  5. rpy_bridge-0.5.1/src/rpy_bridge/compare.py +106 -0
  6. rpy_bridge-0.5.1/src/rpy_bridge/convert.py +63 -0
  7. rpy_bridge-0.5.1/src/rpy_bridge/core.py +505 -0
  8. rpy_bridge-0.5.1/src/rpy_bridge/dataframe.py +74 -0
  9. rpy_bridge-0.5.1/src/rpy_bridge/env.py +108 -0
  10. rpy_bridge-0.5.1/src/rpy_bridge/logging.py +50 -0
  11. rpy_bridge-0.5.1/src/rpy_bridge/renv.py +149 -0
  12. rpy_bridge-0.5.1/src/rpy_bridge/rpy2_loader.py +71 -0
  13. rpy_bridge-0.5.1/src/rpy_bridge.egg-info/PKG-INFO +291 -0
  14. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/src/rpy_bridge.egg-info/SOURCES.txt +8 -1
  15. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/src/rpy_bridge.egg-info/requires.txt +6 -0
  16. rpy_bridge-0.4.0/PKG-INFO +0 -258
  17. rpy_bridge-0.4.0/README.md +0 -199
  18. rpy_bridge-0.4.0/src/rpy_bridge/__init__.py +0 -38
  19. rpy_bridge-0.4.0/src/rpy_bridge/rpy2_utils.py +0 -1221
  20. rpy_bridge-0.4.0/src/rpy_bridge.egg-info/PKG-INFO +0 -258
  21. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/LICENSE +0 -0
  22. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/README.rst +0 -0
  23. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/setup.cfg +0 -0
  24. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/src/rpy_bridge/py.typed +0 -0
  25. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/src/rpy_bridge.egg-info/dependency_links.txt +0 -0
  26. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/src/rpy_bridge.egg-info/top_level.txt +0 -0
  27. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/tests/test_package_call.py +0 -0
  28. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/tests/test_py2r.py +0 -0
  29. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/tests/test_roundtrip.py +0 -0
  30. {rpy_bridge-0.4.0 → rpy_bridge-0.5.1}/tests/test_wrapper.py +0 -0
@@ -0,0 +1,291 @@
1
+ Metadata-Version: 2.4
2
+ Name: rpy-bridge
3
+ Version: 0.5.1
4
+ Summary: Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution.
5
+ Author-email: Victoria Cheung <victoriakcheung@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Victoria Cheung
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Acknowledgement: This project builds on work originally developed at
29
+ Revolution Medicines and interfaces with the rpy2 project, which is licensed
30
+ under the GNU General Public License version 2 or later.
31
+
32
+ Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
33
+ Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
34
+ Keywords: python,r,rpy2,python-r,interoperability,data-science,statistics,bioinformatics
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Intended Audience :: Developers
41
+ Classifier: Intended Audience :: Science/Research
42
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
43
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
44
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
45
+ Requires-Python: >=3.11
46
+ Description-Content-Type: text/markdown
47
+ License-File: LICENSE
48
+ Requires-Dist: numpy>=1.24
49
+ Requires-Dist: pandas>=2.0
50
+ Requires-Dist: loguru>=0.7
51
+ Provides-Extra: r
52
+ Requires-Dist: rpy2>=3.5; extra == "r"
53
+ Provides-Extra: dev
54
+ Requires-Dist: ipykernel>=7.1.0; extra == "dev"
55
+ Requires-Dist: pytest>=8.0; extra == "dev"
56
+ Requires-Dist: ruff>=0.6; extra == "dev"
57
+ Requires-Dist: build>=1.0; extra == "dev"
58
+ Requires-Dist: twine>=4.0; extra == "dev"
59
+ Requires-Dist: certifi>=2025.0; extra == "dev"
60
+ Requires-Dist: ty>=0.0.1a34; extra == "dev"
61
+ Provides-Extra: docs
62
+ Requires-Dist: sphinx; extra == "docs"
63
+ Requires-Dist: myst-parser; extra == "docs"
64
+ Dynamic: license-file
65
+
66
+ # rpy-bridge
67
+
68
+ **rpy-bridge** is a Python-controlled **R execution orchestrator** (not a thin
69
+ rpy2 wrapper). It delivers deterministic, headless-safe R startup; project-root
70
+ inference; out-of-tree `renv` activation; isolated script namespaces; and robust
71
+ Python↔R conversions with dtype/NA normalization. Use it when you need
72
+ reproducible R execution from Python in production pipelines and CI.
73
+
74
+ **Latest release:** [`rpy-bridge` on PyPI](https://pypi.org/project/rpy-bridge/)
75
+
76
+ ---
77
+
78
+ ## What this is (and is not)
79
+
80
+ rpy-bridge **is not a thin rpy2 wrapper**. Key differences:
81
+
82
+ - Infers R project roots via markers (`.git`, `.Rproj`, `renv.lock`, `DESCRIPTION`, `.here`)
83
+ - Activates `renv` even when it lives outside the calling directory
84
+ - Executes from the inferred project root so relative paths behave as R expects
85
+ - Runs headless by default (no GUI probing), isolates scripts from `globalenv()`
86
+ - Normalizes return values for Python (NAs, dtypes, data.frames) and offers comparison helpers
87
+
88
+ ---
89
+
90
+ ## Quickstart
91
+
92
+ Call a package function (no scripts):
93
+
94
+ ```python
95
+ from rpy_bridge import RFunctionCaller
96
+
97
+ rfc = RFunctionCaller()
98
+ samples = rfc.call("stats::rnorm", 5, mean=0, sd=1)
99
+ median_val = rfc.call("stats::median", samples)
100
+ ```
101
+
102
+ Call a function from a local script with `renv` (out-of-tree allowed):
103
+
104
+ ```python
105
+ from pathlib import Path
106
+ from rpy_bridge import RFunctionCaller
107
+
108
+ project_dir = Path("/path/to/your-r-project")
109
+ script = project_dir / "scripts" / "example.R"
110
+
111
+ rfc = RFunctionCaller(path_to_renv=project_dir, scripts=script)
112
+ result = rfc.call("some_function", 42, named_arg="value")
113
+ ```
114
+
115
+ ## Core capabilities
116
+
117
+ ### 1. R execution orchestration
118
+
119
+ - Embeds R via `rpy2` with deterministic startup behavior
120
+ - Disables interactive and GUI-dependent hooks for headless execution
121
+ - Loads R scripts into isolated namespaces (not `globalenv()`)
122
+
123
+ ### 2. Project root inference and path stability
124
+
125
+ - Infers R project roots using markers such as:
126
+ `.git`, `.Rproj`, `renv.lock`, `DESCRIPTION`, `.here`
127
+ - Executes R code from the inferred project root regardless of Python CWD
128
+ - Preserves relative-path behavior expected by R scripts
129
+ - Supports R code using `here::here()` or project-local data
130
+
131
+ ### 3. Out-of-tree `renv` activation
132
+
133
+ - Activates `renv` projects located **outside** the calling Python directory
134
+ - Sources `.Rprofile` and `.Renviron` to reproduce R startup semantics
135
+ - Does not require R scripts and `renv` to live in the same directory
136
+
137
+ ### 4. Python ↔ R data conversion
138
+
139
+ - Converts Python scalars, lists, dicts, and pandas objects into R equivalents
140
+ - Converts R vectors, lists, and data.frames back into Python-native types
141
+ - Handles nested structures, missing values, and mixed types robustly
142
+
143
+ ### 5. Data normalization and diagnostics
144
+
145
+ - Post-processes R data.frames to fix dtype, timezone, and NA semantics
146
+ - Normalizes column types for reliable Python-side comparison
147
+ - Supports structured mismatch diagnostics between Python and R data
148
+
149
+ ### 6. Function invocation across scripts and packages
150
+
151
+ - Calls functions defined in sourced R scripts, base R, or installed packages
152
+ - Supports qualified function names (e.g. `stats::median`)
153
+ - Executes functions within the active project and library context
154
+
155
+ ---
156
+
157
+ ## Calling base R functions and managing packages
158
+
159
+ In addition to sourcing local R scripts, rpy-bridge supports calling functions
160
+ from base R and installed packages directly from Python.
161
+
162
+ Current support includes:
163
+
164
+ - Calling base R functions without a local R script
165
+ - Executing functions from installed R packages within the active environment
166
+
167
+ Planned extensions (roadmap):
168
+
169
+ - Programmatic installation of R packages into the active `renv` or system
170
+ environment when explicitly enabled
171
+ - Declarative package requirements at the Python call site
172
+ - Safe, opt-in package installation for CI and ephemeral environments
173
+
174
+ Package installation is intentionally **not automatic by default** to preserve
175
+ reproducibility and avoid side effects during execution.
176
+
177
+ ---
178
+
179
+ ## Installation
180
+
181
+ ### Prerequisites
182
+
183
+ - System R installed and available on `PATH`
184
+ - Python 3.11+ (tested on 3.11–3.12)
185
+
186
+ ### From PyPI
187
+
188
+ Install rpy-bridge with rpy2 for full R support:
189
+
190
+ ```bash
191
+ python3 -m pip install rpy-bridge rpy2
192
+ ```
193
+
194
+ Using `uv`:
195
+
196
+ ```bash
197
+ uv add rpy-bridge rpy2
198
+ ```
199
+
200
+ ### Development install
201
+
202
+ ```bash
203
+ python3 -m pip install -e .
204
+ ```
205
+
206
+ or:
207
+
208
+ ```bash
209
+ uv sync
210
+ ```
211
+
212
+ ### Required Python dependencies
213
+
214
+ - `rpy2`
215
+ - `pandas`
216
+ - `numpy`
217
+
218
+ ---
219
+
220
+ ## Usage
221
+
222
+ See Quickstart above and examples in `examples/basic_usage.py`.
223
+
224
+ ---
225
+
226
+ ## Round-trip Python ↔ R behavior
227
+
228
+ rpy-bridge attempts to convert Python objects to R and back. Most objects used in
229
+ scientific and ML pipelines round-trip cleanly, but some heterogeneous Python
230
+ structures may be wrapped or slightly altered due to differences in R’s type
231
+ system.
232
+
233
+ | Python type | Round-trip fidelity | Notes |
234
+ | ---------------------------------------------- | ------------------- | --------------------------------------------------------------------- |
235
+ | `int`, `float`, `bool`, `str` | High | Scalars convert directly |
236
+ | Homogeneous `list` of numbers/strings | High | Converted to atomic R vectors |
237
+ | Nested homogeneous lists | High | Converted to nested R lists |
238
+ | `pandas.DataFrame` / `pd.Series` | High | Converted to `data.frame` and normalized on return |
239
+ | Mixed-type `list` or `dict` | Partial | May be wrapped in single-element vectors |
240
+ | `None` / `pd.NA` | High | Converted to R `NULL` |
241
+
242
+ ---
243
+
244
+ ## R setup helpers
245
+
246
+ Helper scripts are provided in `examples/r-deps/` to prepare R environments.
247
+
248
+ - Install system R dependencies (macOS / Homebrew):
249
+
250
+ ```bash
251
+ bash examples/r-deps/install_r_dev_deps_homebrew.sh
252
+ ```
253
+
254
+ - Initialize an `renv` project:
255
+
256
+ ```r
257
+ source("examples/r-deps/setup_env.R")
258
+ ```
259
+
260
+ - Restore the environment on a new machine:
261
+
262
+ ```r
263
+ renv::restore()
264
+ ```
265
+
266
+ ---
267
+
268
+ ## Who this is for
269
+
270
+ rpy-bridge is designed for:
271
+
272
+ - Python-first pipelines that rely on mature R code
273
+ - Teams where R logic must remain authoritative
274
+ - CI or production systems that cannot rely on interactive R sessions
275
+ - Multi-repo or multi-directory projects with non-trivial filesystem layouts
276
+
277
+ It is **not** intended as a convenience wrapper for exploratory R usage.
278
+
279
+ ---
280
+
281
+ ## Licensing
282
+
283
+ - rpy-bridge is released under the MIT License © 2025 Victoria Cheung
284
+ - Depends on [`rpy2`](https://rpy2.github.io), licensed under the GNU GPL (v2 or later)
285
+
286
+ ---
287
+
288
+ ## Acknowledgements
289
+
290
+ This package was spun out of internal tooling I wrote at Revolution Medicines.
291
+ Thanks to the team there for supporting its open-source release.
@@ -0,0 +1,226 @@
1
+ # rpy-bridge
2
+
3
+ **rpy-bridge** is a Python-controlled **R execution orchestrator** (not a thin
4
+ rpy2 wrapper). It delivers deterministic, headless-safe R startup; project-root
5
+ inference; out-of-tree `renv` activation; isolated script namespaces; and robust
6
+ Python↔R conversions with dtype/NA normalization. Use it when you need
7
+ reproducible R execution from Python in production pipelines and CI.
8
+
9
+ **Latest release:** [`rpy-bridge` on PyPI](https://pypi.org/project/rpy-bridge/)
10
+
11
+ ---
12
+
13
+ ## What this is (and is not)
14
+
15
+ rpy-bridge **is not a thin rpy2 wrapper**. Key differences:
16
+
17
+ - Infers R project roots via markers (`.git`, `.Rproj`, `renv.lock`, `DESCRIPTION`, `.here`)
18
+ - Activates `renv` even when it lives outside the calling directory
19
+ - Executes from the inferred project root so relative paths behave as R expects
20
+ - Runs headless by default (no GUI probing), isolates scripts from `globalenv()`
21
+ - Normalizes return values for Python (NAs, dtypes, data.frames) and offers comparison helpers
22
+
23
+ ---
24
+
25
+ ## Quickstart
26
+
27
+ Call a package function (no scripts):
28
+
29
+ ```python
30
+ from rpy_bridge import RFunctionCaller
31
+
32
+ rfc = RFunctionCaller()
33
+ samples = rfc.call("stats::rnorm", 5, mean=0, sd=1)
34
+ median_val = rfc.call("stats::median", samples)
35
+ ```
36
+
37
+ Call a function from a local script with `renv` (out-of-tree allowed):
38
+
39
+ ```python
40
+ from pathlib import Path
41
+ from rpy_bridge import RFunctionCaller
42
+
43
+ project_dir = Path("/path/to/your-r-project")
44
+ script = project_dir / "scripts" / "example.R"
45
+
46
+ rfc = RFunctionCaller(path_to_renv=project_dir, scripts=script)
47
+ result = rfc.call("some_function", 42, named_arg="value")
48
+ ```
49
+
50
+ ## Core capabilities
51
+
52
+ ### 1. R execution orchestration
53
+
54
+ - Embeds R via `rpy2` with deterministic startup behavior
55
+ - Disables interactive and GUI-dependent hooks for headless execution
56
+ - Loads R scripts into isolated namespaces (not `globalenv()`)
57
+
58
+ ### 2. Project root inference and path stability
59
+
60
+ - Infers R project roots using markers such as:
61
+ `.git`, `.Rproj`, `renv.lock`, `DESCRIPTION`, `.here`
62
+ - Executes R code from the inferred project root regardless of Python CWD
63
+ - Preserves relative-path behavior expected by R scripts
64
+ - Supports R code using `here::here()` or project-local data
65
+
66
+ ### 3. Out-of-tree `renv` activation
67
+
68
+ - Activates `renv` projects located **outside** the calling Python directory
69
+ - Sources `.Rprofile` and `.Renviron` to reproduce R startup semantics
70
+ - Does not require R scripts and `renv` to live in the same directory
71
+
72
+ ### 4. Python ↔ R data conversion
73
+
74
+ - Converts Python scalars, lists, dicts, and pandas objects into R equivalents
75
+ - Converts R vectors, lists, and data.frames back into Python-native types
76
+ - Handles nested structures, missing values, and mixed types robustly
77
+
78
+ ### 5. Data normalization and diagnostics
79
+
80
+ - Post-processes R data.frames to fix dtype, timezone, and NA semantics
81
+ - Normalizes column types for reliable Python-side comparison
82
+ - Supports structured mismatch diagnostics between Python and R data
83
+
84
+ ### 6. Function invocation across scripts and packages
85
+
86
+ - Calls functions defined in sourced R scripts, base R, or installed packages
87
+ - Supports qualified function names (e.g. `stats::median`)
88
+ - Executes functions within the active project and library context
89
+
90
+ ---
91
+
92
+ ## Calling base R functions and managing packages
93
+
94
+ In addition to sourcing local R scripts, rpy-bridge supports calling functions
95
+ from base R and installed packages directly from Python.
96
+
97
+ Current support includes:
98
+
99
+ - Calling base R functions without a local R script
100
+ - Executing functions from installed R packages within the active environment
101
+
102
+ Planned extensions (roadmap):
103
+
104
+ - Programmatic installation of R packages into the active `renv` or system
105
+ environment when explicitly enabled
106
+ - Declarative package requirements at the Python call site
107
+ - Safe, opt-in package installation for CI and ephemeral environments
108
+
109
+ Package installation is intentionally **not automatic by default** to preserve
110
+ reproducibility and avoid side effects during execution.
111
+
112
+ ---
113
+
114
+ ## Installation
115
+
116
+ ### Prerequisites
117
+
118
+ - System R installed and available on `PATH`
119
+ - Python 3.11+ (tested on 3.11–3.12)
120
+
121
+ ### From PyPI
122
+
123
+ Install rpy-bridge with rpy2 for full R support:
124
+
125
+ ```bash
126
+ python3 -m pip install rpy-bridge rpy2
127
+ ```
128
+
129
+ Using `uv`:
130
+
131
+ ```bash
132
+ uv add rpy-bridge rpy2
133
+ ```
134
+
135
+ ### Development install
136
+
137
+ ```bash
138
+ python3 -m pip install -e .
139
+ ```
140
+
141
+ or:
142
+
143
+ ```bash
144
+ uv sync
145
+ ```
146
+
147
+ ### Required Python dependencies
148
+
149
+ - `rpy2`
150
+ - `pandas`
151
+ - `numpy`
152
+
153
+ ---
154
+
155
+ ## Usage
156
+
157
+ See Quickstart above and examples in `examples/basic_usage.py`.
158
+
159
+ ---
160
+
161
+ ## Round-trip Python ↔ R behavior
162
+
163
+ rpy-bridge attempts to convert Python objects to R and back. Most objects used in
164
+ scientific and ML pipelines round-trip cleanly, but some heterogeneous Python
165
+ structures may be wrapped or slightly altered due to differences in R’s type
166
+ system.
167
+
168
+ | Python type | Round-trip fidelity | Notes |
169
+ | ---------------------------------------------- | ------------------- | --------------------------------------------------------------------- |
170
+ | `int`, `float`, `bool`, `str` | High | Scalars convert directly |
171
+ | Homogeneous `list` of numbers/strings | High | Converted to atomic R vectors |
172
+ | Nested homogeneous lists | High | Converted to nested R lists |
173
+ | `pandas.DataFrame` / `pd.Series` | High | Converted to `data.frame` and normalized on return |
174
+ | Mixed-type `list` or `dict` | Partial | May be wrapped in single-element vectors |
175
+ | `None` / `pd.NA` | High | Converted to R `NULL` |
176
+
177
+ ---
178
+
179
+ ## R setup helpers
180
+
181
+ Helper scripts are provided in `examples/r-deps/` to prepare R environments.
182
+
183
+ - Install system R dependencies (macOS / Homebrew):
184
+
185
+ ```bash
186
+ bash examples/r-deps/install_r_dev_deps_homebrew.sh
187
+ ```
188
+
189
+ - Initialize an `renv` project:
190
+
191
+ ```r
192
+ source("examples/r-deps/setup_env.R")
193
+ ```
194
+
195
+ - Restore the environment on a new machine:
196
+
197
+ ```r
198
+ renv::restore()
199
+ ```
200
+
201
+ ---
202
+
203
+ ## Who this is for
204
+
205
+ rpy-bridge is designed for:
206
+
207
+ - Python-first pipelines that rely on mature R code
208
+ - Teams where R logic must remain authoritative
209
+ - CI or production systems that cannot rely on interactive R sessions
210
+ - Multi-repo or multi-directory projects with non-trivial filesystem layouts
211
+
212
+ It is **not** intended as a convenience wrapper for exploratory R usage.
213
+
214
+ ---
215
+
216
+ ## Licensing
217
+
218
+ - rpy-bridge is released under the MIT License © 2025 Victoria Cheung
219
+ - Depends on [`rpy2`](https://rpy2.github.io), licensed under the GNU GPL (v2 or later)
220
+
221
+ ---
222
+
223
+ ## Acknowledgements
224
+
225
+ This package was spun out of internal tooling I wrote at Revolution Medicines.
226
+ Thanks to the team there for supporting its open-source release.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "rpy-bridge"
3
- version = "0.4.0"
3
+ version = "0.5.1"
4
4
  description = "Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution."
5
5
  readme = "README.md"
6
6
  license = { file = "LICENSE" }
@@ -41,11 +41,6 @@ classifiers = [
41
41
 
42
42
  ]
43
43
 
44
- [project.optional-dependencies]
45
- r = ["rpy2>=3.5"]
46
- dev = ["ipykernel>=7.1.0"]
47
- docs = ["sphinx", "myst-parser"]
48
-
49
44
  [project.urls]
50
45
  Homepage = "https://github.com/vic-cheung/rpy-bridge"
51
46
  "Issue Tracker" = "https://github.com/vic-cheung/rpy-bridge/issues"
@@ -57,15 +52,14 @@ build-backend = "setuptools.build_meta"
57
52
  # -------------------------------
58
53
  # uv dev dependencies
59
54
  # -------------------------------
55
+ [project.optional-dependencies]
56
+ r = ["rpy2>=3.5"]
57
+ dev = ["ipykernel>=7.1.0", "pytest>=8.0", "ruff>=0.6", "build>=1.0", "twine>=4.0", "certifi>=2025.0", "ty>=0.0.1a34"]
58
+ docs = ["sphinx", "myst-parser"]
59
+
60
60
  [tool.uv]
61
- dev-dependencies = [
62
- "ruff>=0.6",
63
- "pytest>=8.0",
64
- "build>=1.0",
65
- "twine>=4.0",
66
- "certifi>=2025.0",
67
- "ty>=0.0.1a34",
68
- ]
61
+ # Define a meta-extra that combines all extras in one
62
+ all = ["r", "dev", "docs"]
69
63
 
70
64
  # -------------------------------
71
65
  # Ruff configuration
@@ -0,0 +1,14 @@
1
+ """
2
+ Public API for the rpy-bridge package.
3
+
4
+ `RFunctionCaller` is the primary entry point for loading R scripts and calling
5
+ functions. Other helpers are re-exported for compatibility.
6
+ """
7
+
8
+ from .core import RFunctionCaller
9
+ from .renv import activate_renv
10
+
11
+ __all__ = [
12
+ "activate_renv",
13
+ "RFunctionCaller",
14
+ ]
@@ -0,0 +1,106 @@
1
+ """
2
+ DataFrame comparison helpers used to validate parity between R and Python outputs.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from .dataframe import fix_r_dataframe_types, fix_string_nans
13
+
14
+
15
+ def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
16
+ for col in df1.columns.intersection(df2.columns):
17
+ df1[col] = df1[col].replace("", pd.NA)
18
+ df2[col] = df2[col].replace("", pd.NA)
19
+ s1, s2 = df1[col], df2[col]
20
+ dtype1, dtype2 = s1.dtype, s2.dtype
21
+ if (pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_object_dtype(dtype2)) or (
22
+ pd.api.types.is_object_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2)
23
+ ):
24
+ try:
25
+ df1[col] = pd.to_numeric(s1, errors="coerce")
26
+ df2[col] = pd.to_numeric(s2, errors="coerce")
27
+ continue
28
+ except Exception:
29
+ pass
30
+ if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2):
31
+ df1[col] = df1[col].astype("float64")
32
+ df2[col] = df2[col].astype("float64")
33
+ continue
34
+ if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
35
+ df1[col] = df1[col].astype(str)
36
+ df2[col] = df2[col].astype(str)
37
+ return df1, df2
38
+
39
+
40
+ def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
41
+ for col in df1.columns.intersection(df2.columns):
42
+ s1, s2 = df1[col].replace("", pd.NA), df2[col].replace("", pd.NA)
43
+ try:
44
+ s1_num = pd.to_numeric(s1, errors="coerce")
45
+ s2_num = pd.to_numeric(s2, errors="coerce")
46
+ if not s1_num.isna().all() or not s2_num.isna().all():
47
+ df1[col] = s1_num.astype("float64")
48
+ df2[col] = s2_num.astype("float64")
49
+ continue
50
+ except Exception:
51
+ pass
52
+ df1[col], df2[col] = s1, s2
53
+ return df1, df2
54
+
55
+
56
+ def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8) -> dict:
57
+ results: dict[str, Any] = {
58
+ "shape_mismatch": False,
59
+ "columns_mismatch": False,
60
+ "index_mismatch": False,
61
+ "numeric_diffs": {},
62
+ "non_numeric_diffs": {},
63
+ }
64
+ df2 = fix_r_dataframe_types(df2)
65
+ df1 = fix_string_nans(df1)
66
+ df2 = fix_string_nans(df2)
67
+ df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
68
+ df1, df2 = align_numeric_dtypes(df1, df2)
69
+ if df1.shape != df2.shape:
70
+ results["shape_mismatch"] = True
71
+ print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
72
+ if set(df1.columns) != set(df2.columns):
73
+ results["columns_mismatch"] = True
74
+ print("[Warning] Column mismatch:")
75
+ print(f" df1: {df1.columns}")
76
+ print(f" df2: {df2.columns}")
77
+ common_cols = df1.columns.intersection(df2.columns)
78
+ else:
79
+ common_cols = df1.columns
80
+ df1_aligned, df2_aligned = df1.loc[:, common_cols], df2.loc[:, common_cols]
81
+ for col in common_cols:
82
+ col_py, col_r = df1_aligned[col], df2_aligned[col]
83
+ if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(col_r):
84
+ col_py, col_r = col_py.align(col_r)
85
+ close = np.isclose(
86
+ col_py.fillna(np.nan),
87
+ col_r.fillna(np.nan),
88
+ atol=float_tol,
89
+ equal_nan=True,
90
+ )
91
+ if not close.all():
92
+ results["numeric_diffs"][col] = pd.DataFrame(
93
+ {"df1": col_py[~close], "df2": col_r[~close]}
94
+ )
95
+ else:
96
+ unequal = ~col_py.eq(col_r)
97
+ both_na = col_py.isna() & col_r.isna()
98
+ unequal = unequal & ~both_na
99
+ if unequal.any():
100
+ results["non_numeric_diffs"][col] = pd.DataFrame(
101
+ {"df1": col_py[unequal], "df2": col_r[unequal]}
102
+ )
103
+ return results
104
+
105
+
106
+ __all__ = ["normalize_dtypes", "align_numeric_dtypes", "compare_r_py_dataframes"]