rpy-bridge 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rpy_bridge/__init__.py +4 -28
- rpy_bridge/compare.py +106 -0
- rpy_bridge/convert.py +63 -0
- rpy_bridge/core.py +505 -0
- rpy_bridge/dataframe.py +74 -0
- rpy_bridge/env.py +108 -0
- rpy_bridge/logging.py +50 -0
- rpy_bridge/renv.py +149 -0
- rpy_bridge/rpy2_loader.py +71 -0
- rpy_bridge-0.5.0.dist-info/METADATA +297 -0
- rpy_bridge-0.5.0.dist-info/RECORD +15 -0
- rpy_bridge/rpy2_utils.py +0 -1221
- rpy_bridge-0.4.0.dist-info/METADATA +0 -258
- rpy_bridge-0.4.0.dist-info/RECORD +0 -8
- {rpy_bridge-0.4.0.dist-info → rpy_bridge-0.5.0.dist-info}/WHEEL +0 -0
- {rpy_bridge-0.4.0.dist-info → rpy_bridge-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {rpy_bridge-0.4.0.dist-info → rpy_bridge-0.5.0.dist-info}/top_level.txt +0 -0
rpy_bridge/core.py
ADDED
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core orchestration for rpy-bridge.
|
|
3
|
+
|
|
4
|
+
`RFunctionCaller` is the primary public interface for loading R scripts,
|
|
5
|
+
activating renv, and calling R functions with automatic conversion.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import warnings
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from .convert import clean_r_missing, r_namedlist_to_dict
|
|
19
|
+
from .dataframe import postprocess_r_dataframe
|
|
20
|
+
from .env import CI_TESTING, R_HOME
|
|
21
|
+
from .logging import log_r_call, logger
|
|
22
|
+
from .renv import activate_renv, find_project_root, normalize_scripts
|
|
23
|
+
from .rpy2_loader import ensure_rpy2
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NamespaceWrapper:
|
|
27
|
+
"""
|
|
28
|
+
Wrap an R script namespace for Python attribute access.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, env):
|
|
32
|
+
self._env = env
|
|
33
|
+
|
|
34
|
+
def __getattr__(self, func_name):
|
|
35
|
+
if func_name in self._env:
|
|
36
|
+
return self._env[func_name]
|
|
37
|
+
raise AttributeError(f"Function '{func_name}' not found in R namespace")
|
|
38
|
+
|
|
39
|
+
def list_functions(self):
|
|
40
|
+
"""Return a list of callable functions in this namespace."""
|
|
41
|
+
return [k for k, v in self._env.items() if callable(v)]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class RFunctionCaller:
|
|
45
|
+
"""
|
|
46
|
+
Primary interface for calling R functions from Python.
|
|
47
|
+
|
|
48
|
+
`RFunctionCaller` loads one or more R scripts into isolated namespaces and
|
|
49
|
+
provides a unified `call()` method for executing functions from scripts,
|
|
50
|
+
installed R packages, or base R.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
path_to_renv: str | Path | None = None,
|
|
56
|
+
scripts: str | Path | list[str | Path] | None = None,
|
|
57
|
+
packages: str | list[str] | None = None,
|
|
58
|
+
headless: bool = True,
|
|
59
|
+
skip_renv_if_no_r: bool = True,
|
|
60
|
+
**kwargs,
|
|
61
|
+
):
|
|
62
|
+
if path_to_renv is not None and not isinstance(path_to_renv, Path):
|
|
63
|
+
path_to_renv = Path(path_to_renv)
|
|
64
|
+
self.path_to_renv = path_to_renv.resolve() if path_to_renv else None
|
|
65
|
+
|
|
66
|
+
if "script_path" in kwargs:
|
|
67
|
+
script_path_value = kwargs.pop("script_path")
|
|
68
|
+
warnings.warn(
|
|
69
|
+
"'script_path' argument is deprecated. Please use 'scripts' instead (accepts a Path or list of Paths).",
|
|
70
|
+
DeprecationWarning,
|
|
71
|
+
stacklevel=2,
|
|
72
|
+
)
|
|
73
|
+
if scripts is None:
|
|
74
|
+
scripts = script_path_value
|
|
75
|
+
else:
|
|
76
|
+
logger.warning("'script_path' ignored because 'scripts' argument is also provided.")
|
|
77
|
+
|
|
78
|
+
normalized_scripts = normalize_scripts(scripts)
|
|
79
|
+
for script_path in normalized_scripts:
|
|
80
|
+
if not script_path.exists():
|
|
81
|
+
raise FileNotFoundError(f"R script path not found: {script_path}")
|
|
82
|
+
|
|
83
|
+
if kwargs:
|
|
84
|
+
raise TypeError(
|
|
85
|
+
f"RFunctionCaller.__init__() received unexpected keyword arguments: {list(kwargs.keys())}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
self._namespaces: dict[str, Any] = {}
|
|
89
|
+
self._namespace_roots: dict[str, Path] = {}
|
|
90
|
+
self.scripts = normalized_scripts
|
|
91
|
+
|
|
92
|
+
if packages is None:
|
|
93
|
+
self.packages: list[str] = []
|
|
94
|
+
elif isinstance(packages, str):
|
|
95
|
+
self.packages = [packages]
|
|
96
|
+
else:
|
|
97
|
+
self.packages = packages
|
|
98
|
+
|
|
99
|
+
self.headless = headless
|
|
100
|
+
self.skip_renv_if_no_r = skip_renv_if_no_r
|
|
101
|
+
|
|
102
|
+
self._r = None
|
|
103
|
+
self.ro = None
|
|
104
|
+
self.robjects = None
|
|
105
|
+
self.pandas2ri = None
|
|
106
|
+
self.localconverter = None
|
|
107
|
+
self.IntVector = None
|
|
108
|
+
self.FloatVector = None
|
|
109
|
+
self.BoolVector = None
|
|
110
|
+
self.StrVector = None
|
|
111
|
+
self.ListVector = None
|
|
112
|
+
self.NamedList = None
|
|
113
|
+
|
|
114
|
+
self._renv_activated = False
|
|
115
|
+
self._packages_loaded = False
|
|
116
|
+
self._scripts_loaded = [False] * len(self.scripts)
|
|
117
|
+
|
|
118
|
+
def _should_activate_renv(self) -> bool:
|
|
119
|
+
if not self.path_to_renv:
|
|
120
|
+
return False
|
|
121
|
+
if os.environ.get("RPY_BRIDGE_SKIP_RENV") in {"1", "true", "TRUE"}:
|
|
122
|
+
logger.info("[rpy-bridge] Skipping renv activation: RPY_BRIDGE_SKIP_RENV set")
|
|
123
|
+
return False
|
|
124
|
+
if CI_TESTING and R_HOME is None and self.skip_renv_if_no_r:
|
|
125
|
+
logger.info("[rpy-bridge] Skipping renv activation in CI: R_HOME not detected")
|
|
126
|
+
return False
|
|
127
|
+
if R_HOME is None:
|
|
128
|
+
raise RuntimeError(
|
|
129
|
+
"R_HOME not detected; cannot activate renv. Install R or set R_HOME."
|
|
130
|
+
)
|
|
131
|
+
return True
|
|
132
|
+
|
|
133
|
+
def _ensure_headless_env(self) -> None:
|
|
134
|
+
if not self.headless:
|
|
135
|
+
return
|
|
136
|
+
defaults = {
|
|
137
|
+
"R_DEFAULT_DEVICE": "png",
|
|
138
|
+
"R_INTERACTIVE": "false",
|
|
139
|
+
"R_GUI_APP_VERSION": "0",
|
|
140
|
+
"RSTUDIO": "0",
|
|
141
|
+
}
|
|
142
|
+
for key, val in defaults.items():
|
|
143
|
+
os.environ.setdefault(key, val)
|
|
144
|
+
|
|
145
|
+
def _ensure_r_loaded(self) -> None:
|
|
146
|
+
self._ensure_headless_env()
|
|
147
|
+
|
|
148
|
+
if self.robjects is None:
|
|
149
|
+
rpy2_dict = ensure_rpy2()
|
|
150
|
+
self._RPY2 = rpy2_dict
|
|
151
|
+
self._r = rpy2_dict["ro"]
|
|
152
|
+
self.ro = rpy2_dict["robjects"]
|
|
153
|
+
self.robjects = rpy2_dict["robjects"]
|
|
154
|
+
self.pandas2ri = rpy2_dict["pandas2ri"]
|
|
155
|
+
self.localconverter = rpy2_dict["localconverter"]
|
|
156
|
+
self.IntVector = rpy2_dict["IntVector"]
|
|
157
|
+
self.FloatVector = rpy2_dict["FloatVector"]
|
|
158
|
+
self.BoolVector = rpy2_dict["BoolVector"]
|
|
159
|
+
self.StrVector = rpy2_dict["StrVector"]
|
|
160
|
+
self.ListVector = rpy2_dict["ListVector"]
|
|
161
|
+
self.NamedList = rpy2_dict["NamedList"]
|
|
162
|
+
|
|
163
|
+
if not self._renv_activated and self._should_activate_renv():
|
|
164
|
+
try:
|
|
165
|
+
activate_renv(self.path_to_renv)
|
|
166
|
+
self._renv_activated = True
|
|
167
|
+
logger.info(
|
|
168
|
+
f"[rpy-bridge.RFunctionCaller] renv activated for project: {self.path_to_renv}"
|
|
169
|
+
)
|
|
170
|
+
except Exception as exc:
|
|
171
|
+
raise RuntimeError(
|
|
172
|
+
f"Failed to activate renv at {self.path_to_renv}: {exc}"
|
|
173
|
+
) from exc
|
|
174
|
+
|
|
175
|
+
r = self.robjects.r
|
|
176
|
+
try:
|
|
177
|
+
r('options(this.path.gui = "httpd")')
|
|
178
|
+
r("options(this.path.verbose = FALSE)")
|
|
179
|
+
r(
|
|
180
|
+
"""
|
|
181
|
+
if (requireNamespace("this.path", quietly = TRUE)) {
|
|
182
|
+
try({
|
|
183
|
+
assignInNamespace(".gui_path", function(...) "httpd", ns = "this.path")
|
|
184
|
+
}, silent = TRUE)
|
|
185
|
+
}
|
|
186
|
+
"""
|
|
187
|
+
)
|
|
188
|
+
except Exception:
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
self.ensure_r_package("withr")
|
|
192
|
+
|
|
193
|
+
if not hasattr(self, "_namespaces"):
|
|
194
|
+
self._namespaces = {}
|
|
195
|
+
|
|
196
|
+
for idx, script_entry in enumerate(self.scripts):
|
|
197
|
+
if self._scripts_loaded[idx]:
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
script_entry = script_entry.resolve()
|
|
201
|
+
|
|
202
|
+
if script_entry.is_file():
|
|
203
|
+
r_files = [script_entry]
|
|
204
|
+
elif script_entry.is_dir():
|
|
205
|
+
r_files = sorted(script_entry.glob("*.R"))
|
|
206
|
+
if not r_files:
|
|
207
|
+
logger.warning(f"No .R files found in directory: {script_entry}")
|
|
208
|
+
self._scripts_loaded[idx] = True
|
|
209
|
+
continue
|
|
210
|
+
else:
|
|
211
|
+
raise ValueError(f"Invalid script path: {script_entry}")
|
|
212
|
+
|
|
213
|
+
for script_path in r_files:
|
|
214
|
+
ns_name = script_path.stem
|
|
215
|
+
logger.opt(depth=2).info(
|
|
216
|
+
"[rpy-bridge.RFunctionCaller] Loading R script '{}' as namespace '{}'",
|
|
217
|
+
script_path.name,
|
|
218
|
+
ns_name,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
r("env <- new.env(parent=globalenv())")
|
|
222
|
+
r(f'script_path <- "{script_path.as_posix()}"')
|
|
223
|
+
|
|
224
|
+
script_root = find_project_root(self.path_to_renv, [script_path])
|
|
225
|
+
if script_root is None:
|
|
226
|
+
script_root = script_path.parent.resolve()
|
|
227
|
+
script_root_arg = f'"{script_root.as_posix()}"'
|
|
228
|
+
|
|
229
|
+
r(
|
|
230
|
+
f"""
|
|
231
|
+
withr::with_dir(
|
|
232
|
+
{script_root_arg},
|
|
233
|
+
sys.source(script_path, envir=env, chdir = TRUE)
|
|
234
|
+
)
|
|
235
|
+
"""
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
env_obj = r("env")
|
|
239
|
+
self._namespaces[ns_name] = {
|
|
240
|
+
name: env_obj[name] for name in env_obj.keys() if callable(env_obj[name])
|
|
241
|
+
}
|
|
242
|
+
self._namespace_roots[ns_name] = script_root
|
|
243
|
+
|
|
244
|
+
logger.info(
|
|
245
|
+
f"[rpy-bridge.RFunctionCaller] Registered {len(self._namespaces[ns_name])} functions in namespace '{ns_name}'"
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
self._scripts_loaded[idx] = True
|
|
249
|
+
|
|
250
|
+
def __getattr__(self, name: str):
|
|
251
|
+
if "_namespaces" in self.__dict__ and name in self._namespaces:
|
|
252
|
+
ns_env = self._namespaces[name]
|
|
253
|
+
return NamespaceWrapper(ns_env)
|
|
254
|
+
raise AttributeError(f"'RFunctionCaller' object has no attribute '{name}'")
|
|
255
|
+
|
|
256
|
+
def _clean_scalar(self, x):
|
|
257
|
+
robjects = self.robjects
|
|
258
|
+
|
|
259
|
+
if x is None:
|
|
260
|
+
return None
|
|
261
|
+
if x in (
|
|
262
|
+
getattr(robjects, "NA_Real", None),
|
|
263
|
+
getattr(robjects, "NA_Integer", None),
|
|
264
|
+
getattr(robjects, "NA_Logical", None),
|
|
265
|
+
):
|
|
266
|
+
return None
|
|
267
|
+
if x is getattr(robjects, "NA_Character", None):
|
|
268
|
+
return None
|
|
269
|
+
if isinstance(x, float) and np.isnan(x):
|
|
270
|
+
return None
|
|
271
|
+
return x
|
|
272
|
+
|
|
273
|
+
def list_namespaces(self) -> list[str]:
|
|
274
|
+
self._ensure_r_loaded()
|
|
275
|
+
return list(self._namespaces.keys())
|
|
276
|
+
|
|
277
|
+
def list_namespace_functions(self, namespace: str) -> list[str]:
|
|
278
|
+
self._ensure_r_loaded()
|
|
279
|
+
if namespace not in self._namespaces:
|
|
280
|
+
raise ValueError(f"Namespace '{namespace}' not found")
|
|
281
|
+
return [k for k, v in self._namespaces[namespace].items() if callable(v)]
|
|
282
|
+
|
|
283
|
+
def _get_package_functions(self, pkg: str) -> list[str]:
|
|
284
|
+
r = self.robjects.r
|
|
285
|
+
try:
|
|
286
|
+
all_objs = list(r[f'ls("package:{pkg}")'])
|
|
287
|
+
funcs = [
|
|
288
|
+
name
|
|
289
|
+
for name in all_objs
|
|
290
|
+
if r(f'is.function(get("{name}", envir=asNamespace("{pkg}")))')[0]
|
|
291
|
+
]
|
|
292
|
+
return funcs
|
|
293
|
+
except Exception:
|
|
294
|
+
logger.warning(f"Failed to list functions for package '{pkg}'")
|
|
295
|
+
return []
|
|
296
|
+
|
|
297
|
+
def list_all_functions(self, include_packages: bool = False) -> dict[str, list[str]]:
|
|
298
|
+
self._ensure_r_loaded()
|
|
299
|
+
all_funcs = {}
|
|
300
|
+
|
|
301
|
+
for ns_name, ns_env in self._namespaces.items():
|
|
302
|
+
funcs = [name for name, val in ns_env.items() if callable(val)]
|
|
303
|
+
all_funcs[ns_name] = funcs
|
|
304
|
+
|
|
305
|
+
if include_packages:
|
|
306
|
+
r = self.robjects.r
|
|
307
|
+
try:
|
|
308
|
+
pkgs = r("loadedNamespaces()")
|
|
309
|
+
for pkg in pkgs:
|
|
310
|
+
funcs = self._get_package_functions(pkg)
|
|
311
|
+
if not funcs:
|
|
312
|
+
funcs = [
|
|
313
|
+
"[See official documentation for functions, datasets, and objects]"
|
|
314
|
+
]
|
|
315
|
+
all_funcs[pkg] = funcs
|
|
316
|
+
except Exception:
|
|
317
|
+
pass
|
|
318
|
+
|
|
319
|
+
return all_funcs
|
|
320
|
+
|
|
321
|
+
def print_function_tree(self, include_packages: bool = False, max_display: int = 10):
|
|
322
|
+
all_funcs = self.list_all_functions(include_packages=include_packages)
|
|
323
|
+
|
|
324
|
+
for ns_name, funcs in all_funcs.items():
|
|
325
|
+
if not funcs:
|
|
326
|
+
continue
|
|
327
|
+
print(f"{ns_name}/")
|
|
328
|
+
for func in sorted(funcs)[:max_display]:
|
|
329
|
+
print(f" {func}")
|
|
330
|
+
if len(funcs) > max_display:
|
|
331
|
+
print(" ...")
|
|
332
|
+
|
|
333
|
+
def _py2r(self, obj):
|
|
334
|
+
self._ensure_r_loaded()
|
|
335
|
+
robjects = self.robjects
|
|
336
|
+
pandas2ri = self.pandas2ri
|
|
337
|
+
FloatVector = self.FloatVector
|
|
338
|
+
BoolVector = self.BoolVector
|
|
339
|
+
StrVector = self.StrVector
|
|
340
|
+
ListVector = self.ListVector
|
|
341
|
+
localconverter = self.localconverter
|
|
342
|
+
|
|
343
|
+
r_types = (
|
|
344
|
+
robjects.vectors.IntVector,
|
|
345
|
+
robjects.vectors.FloatVector,
|
|
346
|
+
robjects.vectors.BoolVector,
|
|
347
|
+
robjects.vectors.StrVector,
|
|
348
|
+
robjects.vectors.ListVector,
|
|
349
|
+
robjects.DataFrame,
|
|
350
|
+
)
|
|
351
|
+
if isinstance(obj, r_types):
|
|
352
|
+
return obj
|
|
353
|
+
|
|
354
|
+
def is_na(val):
|
|
355
|
+
return val is None or val is pd.NA or (isinstance(val, float) and pd.isna(val))
|
|
356
|
+
|
|
357
|
+
with localconverter(robjects.default_converter + pandas2ri.converter):
|
|
358
|
+
if is_na(obj):
|
|
359
|
+
return robjects.NULL
|
|
360
|
+
if isinstance(obj, pd.DataFrame):
|
|
361
|
+
return pandas2ri.py2rpy(obj)
|
|
362
|
+
if isinstance(obj, pd.Series):
|
|
363
|
+
return self._py2r(obj.tolist())
|
|
364
|
+
if isinstance(obj, (int, float, bool, str)):
|
|
365
|
+
return obj
|
|
366
|
+
if isinstance(obj, list):
|
|
367
|
+
if len(obj) == 0:
|
|
368
|
+
return FloatVector([])
|
|
369
|
+
|
|
370
|
+
types = set(type(x) for x in obj if not is_na(x))
|
|
371
|
+
if types <= {int, float}:
|
|
372
|
+
return FloatVector([robjects.NA_Real if is_na(x) else float(x) for x in obj])
|
|
373
|
+
if types <= {bool}:
|
|
374
|
+
return BoolVector([robjects.NA_Logical if is_na(x) else x for x in obj])
|
|
375
|
+
if types <= {str}:
|
|
376
|
+
return StrVector([robjects.NA_Character if is_na(x) else x for x in obj])
|
|
377
|
+
return ListVector({str(i): self._py2r(v) for i, v in enumerate(obj)})
|
|
378
|
+
if isinstance(obj, dict):
|
|
379
|
+
return ListVector({k: self._py2r(v) for k, v in obj.items()})
|
|
380
|
+
raise NotImplementedError(f"Cannot convert Python object to R: {type(obj)}")
|
|
381
|
+
|
|
382
|
+
def _r2py(self, obj, top_level: bool = True):
|
|
383
|
+
robjects = self.robjects
|
|
384
|
+
NamedList = self.NamedList
|
|
385
|
+
ListVector = self.ListVector
|
|
386
|
+
StrVector = self.StrVector
|
|
387
|
+
IntVector = self.IntVector
|
|
388
|
+
FloatVector = self.FloatVector
|
|
389
|
+
BoolVector = self.BoolVector
|
|
390
|
+
NULLType = self._RPY2["NULLType"]
|
|
391
|
+
localconverter = self.localconverter
|
|
392
|
+
pandas2ri = self.pandas2ri
|
|
393
|
+
|
|
394
|
+
if isinstance(obj, NULLType):
|
|
395
|
+
return None
|
|
396
|
+
|
|
397
|
+
if isinstance(obj, robjects.DataFrame):
|
|
398
|
+
with localconverter(robjects.default_converter + pandas2ri.converter):
|
|
399
|
+
df = robjects.conversion.rpy2py(obj)
|
|
400
|
+
df = postprocess_r_dataframe(df)
|
|
401
|
+
return clean_r_missing(df, caller=self)
|
|
402
|
+
|
|
403
|
+
if isinstance(obj, (NamedList, ListVector)):
|
|
404
|
+
py_obj = r_namedlist_to_dict(obj, caller=self, top_level=top_level)
|
|
405
|
+
if isinstance(py_obj, list) and len(py_obj) == 1 and top_level:
|
|
406
|
+
return py_obj[0]
|
|
407
|
+
return py_obj
|
|
408
|
+
|
|
409
|
+
if isinstance(obj, (StrVector, IntVector, FloatVector, BoolVector)):
|
|
410
|
+
py_list = [self._clean_scalar(v) for v in obj]
|
|
411
|
+
if len(py_list) == 1 and top_level:
|
|
412
|
+
return py_list[0]
|
|
413
|
+
return py_list
|
|
414
|
+
|
|
415
|
+
return self._clean_scalar(obj)
|
|
416
|
+
|
|
417
|
+
def ensure_r_package(self, pkg: str):
|
|
418
|
+
r = self.robjects.r
|
|
419
|
+
try:
|
|
420
|
+
r(f'suppressMessages(library("{pkg}", character.only=TRUE))')
|
|
421
|
+
except Exception:
|
|
422
|
+
logger.info(f"[rpy-bridge.RFunctionCaller] Package '{pkg}' not found.")
|
|
423
|
+
logger.warning(f"[rpy-bridge.RFunctionCaller] Installing missing R package: {pkg}")
|
|
424
|
+
r(f'install.packages("{pkg}", repos="https://cloud.r-project.org")')
|
|
425
|
+
r(f'suppressMessages(library("{pkg}", character.only=TRUE))')
|
|
426
|
+
|
|
427
|
+
def call(self, func_name: str, *args, **kwargs):
|
|
428
|
+
self._ensure_r_loaded()
|
|
429
|
+
|
|
430
|
+
func = None
|
|
431
|
+
source_info = None
|
|
432
|
+
namespace_root = None
|
|
433
|
+
|
|
434
|
+
if "::" in func_name:
|
|
435
|
+
ns_name, fname = func_name.split("::", 1)
|
|
436
|
+
if ns_name in self._namespaces:
|
|
437
|
+
ns_env = self._namespaces[ns_name]
|
|
438
|
+
if fname in ns_env:
|
|
439
|
+
func = ns_env[fname]
|
|
440
|
+
source_info = f"script namespace '{ns_name}'"
|
|
441
|
+
namespace_root = self._namespace_roots.get(ns_name)
|
|
442
|
+
else:
|
|
443
|
+
raise ValueError(
|
|
444
|
+
f"Function '{fname}' not found in R script namespace '{ns_name}'"
|
|
445
|
+
)
|
|
446
|
+
else:
|
|
447
|
+
try:
|
|
448
|
+
func = self.robjects.r(f"{ns_name}::{fname}")
|
|
449
|
+
source_info = f"R package '{ns_name}'"
|
|
450
|
+
except Exception as exc:
|
|
451
|
+
raise RuntimeError(
|
|
452
|
+
f"Failed to resolve R function '{func_name}': {exc}"
|
|
453
|
+
) from exc
|
|
454
|
+
|
|
455
|
+
else:
|
|
456
|
+
for ns_name, ns_env in self._namespaces.items():
|
|
457
|
+
if func_name in ns_env:
|
|
458
|
+
func = ns_env[func_name]
|
|
459
|
+
source_info = f"script namespace '{ns_name}'"
|
|
460
|
+
namespace_root = self._namespace_roots.get(ns_name)
|
|
461
|
+
break
|
|
462
|
+
|
|
463
|
+
if func is None:
|
|
464
|
+
try:
|
|
465
|
+
func = self.robjects.globalenv[func_name]
|
|
466
|
+
source_info = "global environment"
|
|
467
|
+
except KeyError:
|
|
468
|
+
pass
|
|
469
|
+
|
|
470
|
+
if func is None:
|
|
471
|
+
try:
|
|
472
|
+
func = self.robjects.r[func_name]
|
|
473
|
+
source_info = "base R / loaded package"
|
|
474
|
+
except KeyError:
|
|
475
|
+
raise ValueError(
|
|
476
|
+
f"R function '{func_name}' not found in any namespace, global env, or base R."
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
r_args = [self._py2r(a) for a in args]
|
|
480
|
+
r_kwargs = {k: self._py2r(v) for k, v in kwargs.items()}
|
|
481
|
+
|
|
482
|
+
try:
|
|
483
|
+
if source_info and source_info.startswith("script namespace") and namespace_root:
|
|
484
|
+
r = self.robjects.r
|
|
485
|
+
try:
|
|
486
|
+
r(f'old_wd <- getwd(); setwd("{namespace_root.as_posix()}")')
|
|
487
|
+
result = func(*r_args, **r_kwargs)
|
|
488
|
+
finally:
|
|
489
|
+
try:
|
|
490
|
+
r("setwd(old_wd)")
|
|
491
|
+
except Exception:
|
|
492
|
+
pass
|
|
493
|
+
else:
|
|
494
|
+
result = func(*r_args, **r_kwargs)
|
|
495
|
+
except Exception as exc:
|
|
496
|
+
raise RuntimeError(
|
|
497
|
+
f"Error calling R function '{func_name}' from {source_info}: {exc}"
|
|
498
|
+
) from exc
|
|
499
|
+
|
|
500
|
+
log_r_call(func_name, source_info)
|
|
501
|
+
|
|
502
|
+
return self._r2py(result)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
__all__ = ["RFunctionCaller", "NamespaceWrapper"]
|
rpy_bridge/dataframe.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DataFrame cleaning and post-processing utilities for R ↔ Python workflows.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def clean_r_dataframe(r_df: pd.DataFrame) -> pd.DataFrame:
|
|
12
|
+
for attr in [".groups", ".rows"]:
|
|
13
|
+
try:
|
|
14
|
+
del r_df.attrs[attr]
|
|
15
|
+
except (KeyError, AttributeError):
|
|
16
|
+
pass
|
|
17
|
+
return r_df
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def fix_string_nans(df: pd.DataFrame) -> pd.DataFrame:
|
|
21
|
+
return df.replace(["nan", "NaN", "NA", "na", ""], pd.NA)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
|
|
25
|
+
df = df.replace(["", "nan", "NaN", "NA", "na"], pd.NA)
|
|
26
|
+
for col in df.columns:
|
|
27
|
+
series = df[col]
|
|
28
|
+
if pd.api.types.is_object_dtype(series):
|
|
29
|
+
coerced = pd.to_numeric(series, errors="coerce")
|
|
30
|
+
if coerced.notna().sum() >= series.notna().sum() * 0.5:
|
|
31
|
+
df[col] = coerced
|
|
32
|
+
if pd.api.types.is_integer_dtype(df[col]) and df[col].isna().any():
|
|
33
|
+
df[col] = df[col].astype("float64")
|
|
34
|
+
return df
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
|
|
38
|
+
for col in df.columns:
|
|
39
|
+
series = df[col]
|
|
40
|
+
if pd.api.types.is_integer_dtype(series):
|
|
41
|
+
df[col] = series.mask(series == -2147483648, pd.NA)
|
|
42
|
+
if pd.api.types.is_numeric_dtype(series):
|
|
43
|
+
values = series.dropna()
|
|
44
|
+
if not values.empty and values.between(10000, 40000).all():
|
|
45
|
+
try:
|
|
46
|
+
df[col] = pd.to_datetime("1970-01-01") + pd.to_timedelta(series, unit="D")
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
if pd.api.types.is_datetime64tz_dtype(series):
|
|
50
|
+
df[col] = series.dt.tz_localize(None)
|
|
51
|
+
return df
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
55
|
+
df = fix_r_dataframe_types(df)
|
|
56
|
+
df = fix_string_nans(df)
|
|
57
|
+
df = normalize_single_df_dtypes(df)
|
|
58
|
+
if df.index.dtype == object:
|
|
59
|
+
try:
|
|
60
|
+
int_index = df.index.astype(int)
|
|
61
|
+
if (int_index == np.arange(len(df)) + 1).all():
|
|
62
|
+
df.index = pd.RangeIndex(start=0, stop=len(df))
|
|
63
|
+
except Exception:
|
|
64
|
+
pass
|
|
65
|
+
return df
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
__all__ = [
|
|
69
|
+
"clean_r_dataframe",
|
|
70
|
+
"fix_string_nans",
|
|
71
|
+
"normalize_single_df_dtypes",
|
|
72
|
+
"fix_r_dataframe_types",
|
|
73
|
+
"postprocess_r_dataframe",
|
|
74
|
+
]
|
rpy_bridge/env.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Environment discovery for R and rpy2.
|
|
3
|
+
|
|
4
|
+
Responsible for:
|
|
5
|
+
- warning suppression for benign R env messages
|
|
6
|
+
- detecting the R installation and exporting R_HOME
|
|
7
|
+
- platform-specific library path adjustments
|
|
8
|
+
- CI/testing skip knobs
|
|
9
|
+
- availability check for rpy2
|
|
10
|
+
|
|
11
|
+
Importing this module performs the same side effects as the original
|
|
12
|
+
rpy2_utils import-time block to avoid behavior changes.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import importlib.util
|
|
18
|
+
import os
|
|
19
|
+
import subprocess
|
|
20
|
+
import sys
|
|
21
|
+
import warnings
|
|
22
|
+
|
|
23
|
+
from .logging import logger
|
|
24
|
+
|
|
25
|
+
warnings.filterwarnings("ignore", message="Environment variable .* redefined by R")
|
|
26
|
+
|
|
27
|
+
# Determine if we're running in CI / testing
|
|
28
|
+
CI_TESTING = os.environ.get("GITHUB_ACTIONS") == "true" or os.environ.get("TESTING") == "1"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def ensure_rpy2_available() -> None:
|
|
32
|
+
"""
|
|
33
|
+
Raise with instructions if rpy2 is missing.
|
|
34
|
+
"""
|
|
35
|
+
if importlib.util.find_spec("rpy2") is None:
|
|
36
|
+
raise RuntimeError(
|
|
37
|
+
"\n[Error] rpy2 is not installed. Please install it in your Python environment:\n"
|
|
38
|
+
" pip install rpy2\n\n"
|
|
39
|
+
"Make sure your Python environment can access your system R installation.\n"
|
|
40
|
+
"On macOS with Homebrew: brew install r\n"
|
|
41
|
+
"On Linux: apt install r-base (Debian/Ubuntu) or yum install R (CentOS/RHEL)\n"
|
|
42
|
+
"On Windows: install R from https://cran.r-project.org\n"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def find_r_home() -> str | None:
|
|
47
|
+
"""Detect system R installation."""
|
|
48
|
+
try:
|
|
49
|
+
r_home = subprocess.check_output(
|
|
50
|
+
["R", "--vanilla", "--slave", "-e", "cat(R.home())"],
|
|
51
|
+
stderr=subprocess.PIPE,
|
|
52
|
+
text=True,
|
|
53
|
+
).strip()
|
|
54
|
+
if r_home.endswith(">"):
|
|
55
|
+
r_home = r_home[:-1].strip()
|
|
56
|
+
return r_home
|
|
57
|
+
except FileNotFoundError:
|
|
58
|
+
possible_paths = [
|
|
59
|
+
"/usr/lib/R",
|
|
60
|
+
"/usr/local/lib/R",
|
|
61
|
+
"/opt/homebrew/Cellar/r/4.5.2/lib/R",
|
|
62
|
+
"C:\\Program Files\\R\\R-4.5.2",
|
|
63
|
+
]
|
|
64
|
+
for path in possible_paths:
|
|
65
|
+
if os.path.exists(path):
|
|
66
|
+
return path
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
R_HOME = os.environ.get("R_HOME")
|
|
71
|
+
if not R_HOME:
|
|
72
|
+
R_HOME = find_r_home()
|
|
73
|
+
if not R_HOME:
|
|
74
|
+
if CI_TESTING:
|
|
75
|
+
logger.warning("R not found; skipping all R-dependent setup in CI/testing environment.")
|
|
76
|
+
R_HOME = None
|
|
77
|
+
else:
|
|
78
|
+
raise RuntimeError("R not found. Please install R or add it to PATH.")
|
|
79
|
+
else:
|
|
80
|
+
os.environ["R_HOME"] = R_HOME
|
|
81
|
+
|
|
82
|
+
logger.info(
|
|
83
|
+
f"[rpy-bridge] R_HOME = {R_HOME if R_HOME else 'not detected; R-dependent code skipped'}"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Only configure platform-specific library paths if R is available
|
|
87
|
+
if R_HOME:
|
|
88
|
+
if sys.platform == "darwin":
|
|
89
|
+
lib_path = os.path.join(R_HOME, "lib")
|
|
90
|
+
if lib_path not in os.environ.get("DYLD_FALLBACK_LIBRARY_PATH", ""):
|
|
91
|
+
os.environ["DYLD_FALLBACK_LIBRARY_PATH"] = (
|
|
92
|
+
f"{lib_path}:{os.environ.get('DYLD_FALLBACK_LIBRARY_PATH', '')}"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
elif sys.platform.startswith("linux"):
|
|
96
|
+
lib_path = os.path.join(R_HOME, "lib")
|
|
97
|
+
ld_path = os.environ.get("LD_LIBRARY_PATH", "")
|
|
98
|
+
if lib_path not in ld_path.split(":"):
|
|
99
|
+
os.environ["LD_LIBRARY_PATH"] = f"{lib_path}:{ld_path}"
|
|
100
|
+
|
|
101
|
+
elif sys.platform.startswith("win"):
|
|
102
|
+
bin_path = os.path.join(R_HOME, "bin", "x64")
|
|
103
|
+
path_env = os.environ.get("PATH", "")
|
|
104
|
+
if bin_path not in path_env.split(os.pathsep):
|
|
105
|
+
os.environ["PATH"] = f"{bin_path}{os.pathsep}{path_env}"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
__all__ = ["CI_TESTING", "R_HOME", "ensure_rpy2_available", "find_r_home"]
|