rpy-bridge 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rpy_bridge/__init__.py CHANGED
@@ -6,6 +6,7 @@ continue importing directly from ``rpy_bridge``.
6
6
  """
7
7
 
8
8
  from .rpy2_utils import (
9
+ NamespaceWrapper,
9
10
  RFunctionCaller,
10
11
  activate_renv,
11
12
  align_numeric_dtypes,
@@ -23,6 +24,7 @@ from .rpy2_utils import (
23
24
  __all__ = [
24
25
  "activate_renv",
25
26
  "RFunctionCaller",
27
+ "NamespaceWrapper",
26
28
  "r_namedlist_to_dict",
27
29
  "clean_r_dataframe",
28
30
  "fix_string_nans",
rpy_bridge/rpy2_utils.py CHANGED
@@ -1,10 +1,23 @@
1
1
  """
2
- Wrapper for calling R functions from Python using rpy2.
2
+ RPython Integration Utility
3
3
 
4
+ Provides tools to load R scripts, activate renv environments, and call R functions
5
+ directly from Python, with automatic conversion between R and Python data types.
6
+
7
+ ----------
8
+ Requirements
4
9
  ----------
5
- ** R must be installed and accessible in your environment **
6
- Ensure compatibility with your R project's renv setup (or other virtual env/base env if that's what you're using).
10
+ - R must be installed and accessible in your system environment.
11
+ - Ensure compatibility with your R project's renv setup (or any other R environment you use).
12
+
13
+ Features
7
14
  ----------
15
+ - Lazy loading of rpy2 and R runtime.
16
+ - Activation of renv environments for isolated R project dependencies.
17
+ - Support for sourcing individual R scripts or directories of scripts.
18
+ - Namespace-based access to R functions.
19
+ - Automatic conversion between R vectors, data frames, and Python types (pandas, lists, scalars).
20
+ - Utilities for cleaning and aligning data frames between R and Python.
8
21
  """
9
22
 
10
23
  # ruff: noqa: E402
@@ -16,7 +29,7 @@ import subprocess
16
29
  import sys
17
30
  import warnings
18
31
  from pathlib import Path
19
- from typing import TYPE_CHECKING, Any, Union
32
+ from typing import TYPE_CHECKING, Any, Iterable, Union
20
33
 
21
34
  import numpy as np
22
35
  import pandas as pd
@@ -29,7 +42,8 @@ if TYPE_CHECKING:
29
42
 
30
43
  from loguru import Logger as LoguruLogger
31
44
 
32
- LoggerType = LoggerType = Union[LoguruLogger, logging_module.Logger]
45
+ LoggerType = Union[LoguruLogger, logging_module.Logger]
46
+
33
47
  else:
34
48
  LoggerType = None # runtime doesn’t need the type object
35
49
 
@@ -44,6 +58,47 @@ except ImportError:
44
58
  logger = logging.getLogger("rpy-bridge")
45
59
 
46
60
 
61
+ # --- Remove default handler to override global default ---
62
+ logger.remove()
63
+
64
+ # --- Add a "sink" for RFunctionCaller logs ---
65
+ _rfc_logger = logger.bind(tag="[RFunctionCaller]")
66
+ _rfc_logger.add(
67
+ sys.stderr,
68
+ format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}", # Only show message
69
+ level="INFO",
70
+ )
71
+
72
+
73
+ def _log_r_call(func_name: str, source_info: str):
74
+ """
75
+ Log an R function call, showing only '[RFunctionCaller] Called ...'
76
+ """
77
+ _rfc_logger.opt(depth=1, record=False).info(
78
+ "[rpy-bridge.RFunctionCaller] Called R function '{}' from {}",
79
+ func_name,
80
+ source_info,
81
+ )
82
+
83
+
84
+ # ---------------------------------------------------------------------
85
+ # Path resolution
86
+ # ---------------------------------------------------------------------
87
+ def _normalize_scripts(
88
+ scripts: Union[str, Path, Iterable[Union[str, Path]], None],
89
+ ) -> list[Path]:
90
+ if scripts is None:
91
+ return []
92
+ if isinstance(scripts, (str, Path)):
93
+ return [Path(scripts).resolve()]
94
+ try:
95
+ return [Path(s).resolve() for s in scripts]
96
+ except TypeError:
97
+ raise TypeError(
98
+ f"Invalid type for 'scripts': {type(scripts)}. Must be str, Path, or list/iterable thereof."
99
+ )
100
+
101
+
47
102
  # ---------------------------------------------------------------------
48
103
  # R detection and rpy2 installation
49
104
  # ---------------------------------------------------------------------
@@ -64,7 +119,9 @@ def ensure_rpy2_available() -> None:
64
119
 
65
120
 
66
121
  def find_r_home() -> str | None:
67
- """Detect system R installation."""
122
+ """
123
+ Detect system R installation.
124
+ """
68
125
  try:
69
126
  r_home = subprocess.check_output(
70
127
  ["R", "--vanilla", "--slave", "-e", "cat(R.home())"],
@@ -88,26 +145,50 @@ def find_r_home() -> str | None:
88
145
  return None
89
146
 
90
147
 
91
- R_HOME = find_r_home()
148
+ # Determine if we're running in CI / testing
149
+ CI_TESTING = (
150
+ os.environ.get("GITHUB_ACTIONS") == "true" or os.environ.get("TESTING") == "1"
151
+ )
152
+
153
+ R_HOME = os.environ.get("R_HOME")
92
154
  if not R_HOME:
93
- raise RuntimeError("R not found. Please install R or add it to PATH.")
94
-
95
- logger.info(f"R_HOME = {R_HOME}")
96
- os.environ["R_HOME"] = R_HOME
97
- ensure_rpy2_available()
98
-
99
- # macOS dynamic library path
100
- if sys.platform == "darwin":
101
- lib_path = os.path.join(R_HOME, "lib")
102
- if lib_path not in os.environ.get("DYLD_FALLBACK_LIBRARY_PATH", ""):
103
- os.environ["DYLD_FALLBACK_LIBRARY_PATH"] = (
104
- f"{lib_path}:{os.environ.get('DYLD_FALLBACK_LIBRARY_PATH','')}"
105
- )
155
+ R_HOME = find_r_home()
156
+ if not R_HOME:
157
+ if CI_TESTING:
158
+ logger.warning(
159
+ "R not found; skipping all R-dependent setup in CI/testing environment."
160
+ )
161
+ R_HOME = None # Explicitly None to signal "no R available"
162
+ else:
163
+ raise RuntimeError("R not found. Please install R or add it to PATH.")
164
+ else:
165
+ os.environ["R_HOME"] = R_HOME
166
+
167
+ logger.info(
168
+ f"[rpy-bridge] R_HOME = {R_HOME if R_HOME else 'not detected; R-dependent code skipped'}"
169
+ )
170
+
171
+ # Only configure platform-specific library paths if R is available
172
+ if R_HOME:
173
+ if sys.platform == "darwin":
174
+ lib_path = os.path.join(R_HOME, "lib")
175
+ if lib_path not in os.environ.get("DYLD_FALLBACK_LIBRARY_PATH", ""):
176
+ os.environ["DYLD_FALLBACK_LIBRARY_PATH"] = (
177
+ f"{lib_path}:{os.environ.get('DYLD_FALLBACK_LIBRARY_PATH','')}"
178
+ )
179
+
180
+ elif sys.platform.startswith("linux"):
181
+ lib_path = os.path.join(R_HOME, "lib")
182
+ ld_path = os.environ.get("LD_LIBRARY_PATH", "")
183
+ if lib_path not in ld_path.split(":"):
184
+ os.environ["LD_LIBRARY_PATH"] = f"{lib_path}:{ld_path}"
185
+
186
+ elif sys.platform.startswith("win"):
187
+ bin_path = os.path.join(R_HOME, "bin", "x64")
188
+ path_env = os.environ.get("PATH", "")
189
+ if bin_path not in path_env.split(os.pathsep):
190
+ os.environ["PATH"] = f"{bin_path}{os.pathsep}{path_env}"
106
191
 
107
- elif sys.platform.startswith("linux"):
108
- lib_path = os.path.join(R_HOME, "lib")
109
- ld_path = os.environ.get("LD_LIBRARY_PATH", "")
110
- os.environ["LD_LIBRARY_PATH"] = f"{lib_path}:{ld_path}"
111
192
 
112
193
  # ---------------------------------------------------------------------
113
194
  # Lazy rpy2 import machinery
@@ -190,24 +271,47 @@ def activate_renv(path_to_renv: Path) -> None:
190
271
  renviron_file = project_dir / ".Renviron"
191
272
  if renviron_file.is_file():
192
273
  os.environ["R_ENVIRON_USER"] = str(renviron_file)
193
- logger.info(f"R_ENVIRON_USER set to: {renviron_file}")
274
+ logger.info(f"[rpy-bridge] R_ENVIRON_USER set to: {renviron_file}")
194
275
 
195
276
  rprofile_file = project_dir / ".Rprofile"
196
277
  if rprofile_file.is_file():
197
278
  robjects.r(f'source("{rprofile_file.as_posix()}")')
198
- logger.info(f".Rprofile sourced: {rprofile_file}")
279
+ logger.info(f"[rpy-bridge] .Rprofile sourced: {rprofile_file}")
199
280
 
200
281
  try:
201
282
  robjects.r("suppressMessages(library(renv))")
202
283
  except Exception:
203
- logger.info("Installing renv package in project library...")
284
+ logger.info("[rpy-bridge] Installing renv package in project library...")
204
285
  robjects.r(
205
286
  f'install.packages("renv", repos="https://cloud.r-project.org", lib="{renv_dir / "library"}")'
206
287
  )
207
288
  robjects.r("library(renv)")
208
289
 
209
290
  robjects.r(f'renv::load("{project_dir.as_posix()}")')
210
- logger.info(f"renv environment loaded for project: {project_dir}")
291
+ logger.info(f"[rpy-bridge] renv environment loaded for project: {project_dir}")
292
+
293
+
294
+ # ---------------------------------------------------------------------
295
+ # NamespaceWrapper
296
+ # ---------------------------------------------------------------------
297
+ class NamespaceWrapper:
298
+ """
299
+ Wraps an R script namespace for Python attribute access.
300
+ """
301
+
302
+ def __init__(self, env):
303
+ self._env = env
304
+
305
+ def __getattr__(self, func_name):
306
+ if func_name in self._env:
307
+ return self._env[func_name]
308
+ raise AttributeError(f"Function '{func_name}' not found in R namespace")
309
+
310
+ def list_functions(self):
311
+ """
312
+ Return a list of callable functions in this namespace.
313
+ """
314
+ return [k for k, v in self._env.items() if callable(v)]
211
315
 
212
316
 
213
317
  # ---------------------------------------------------------------------
@@ -215,24 +319,93 @@ def activate_renv(path_to_renv: Path) -> None:
215
319
  # ---------------------------------------------------------------------
216
320
  class RFunctionCaller:
217
321
  """
218
- Utility to load and call R functions from a script, lazily loading rpy2 and activating renv.
322
+ Primary interface for calling R functions from Python.
323
+
324
+ ``RFunctionCaller`` loads one or more R scripts into isolated namespaces
325
+ and provides a unified ``call()`` method for executing:
326
+
327
+ * Functions defined in sourced R scripts
328
+ * Base R functions (e.g. ``sum``, ``mean``)
329
+ * Functions from installed R packages (via ``package::function``)
219
330
 
220
- Supports:
221
- - Scripts with custom functions
222
- - Base R functions
223
- - Functions in installed packages
224
- - Automatic conversion of Python types (lists, dicts, scalars, pandas DataFrames) to R objects
331
+ In most workflows, users only need to interact with this class.
332
+
333
+ Parameters
334
+ ----------
335
+ path_to_renv : Path or None, optional
336
+ Path to an R project that uses ``renv``. This may be either the project
337
+ root or the ``renv/`` directory itself. If provided, the renv
338
+ environment is activated before any scripts are sourced.
339
+
340
+ scripts : str, Path, list[str | Path], or None, optional
341
+ One or more ``.R`` files or directories containing ``.R`` files.
342
+ Each script is sourced into its own namespace.
343
+
344
+ packages : str or list[str], optional
345
+ R packages to load (and install if missing) before calling functions.
346
+
347
+ Notes
348
+ -----
349
+ * Python objects are automatically converted to R objects.
350
+ * R return values are converted back to Python equivalents.
351
+ * Missing values (``None``, ``pd.NA``) are mapped to R ``NA``.
225
352
  """
226
353
 
227
354
  def __init__(
228
355
  self,
229
356
  path_to_renv: Path | None = None,
230
- script_path: Path | None = None,
231
- packages: list[str] | None = None,
357
+ scripts: str | Path | list[str | Path] | None = None,
358
+ packages: str | list[str] | None = None,
359
+ **kwargs, # catch unexpected keywords
232
360
  ):
361
+ # --- Handle deprecated 'script_path' ---
362
+ if "script_path" in kwargs:
363
+ script_path_value = kwargs.pop("script_path")
364
+ warnings.warn(
365
+ "'script_path' argument is deprecated. "
366
+ "Please use 'scripts' instead (accepts a Path or list of Paths).",
367
+ DeprecationWarning,
368
+ stacklevel=2,
369
+ )
370
+ if scripts is None:
371
+ scripts = script_path_value
372
+ else:
373
+ # Both provided → prioritize scripts and ignore script_path
374
+ logger.warning(
375
+ "'script_path' ignored because 'scripts' argument is also provided."
376
+ )
377
+
378
+ self.scripts = _normalize_scripts(scripts)
379
+
380
+ # --- Check all scripts exist immediately ---
381
+ for script_path in self.scripts:
382
+ if not script_path.exists():
383
+ raise FileNotFoundError(f"R script path not found: {script_path}")
384
+
385
+ # Raise error if other unexpected kwargs remain
386
+ if kwargs:
387
+ raise TypeError(
388
+ f"RFunctionCaller.__init__() received unexpected keyword arguments: {list(kwargs.keys())}"
389
+ )
390
+
233
391
  self.path_to_renv = path_to_renv.resolve() if path_to_renv else None
234
- self.script_path = script_path.resolve() if script_path else None
235
- self.packages = packages or None
392
+ self._namespaces: dict[str, Any] = {}
393
+
394
+ # Normalize scripts to a list
395
+ if scripts is None:
396
+ self.scripts: list[Path] = []
397
+ elif isinstance(scripts, Path):
398
+ self.scripts = [scripts.resolve()]
399
+ else:
400
+ self.scripts = [s.resolve() for s in scripts]
401
+
402
+ # Normalize packages to a list
403
+ if packages is None:
404
+ self.packages: list[str] = []
405
+ elif isinstance(packages, str):
406
+ self.packages = [packages]
407
+ else:
408
+ self.packages = packages
236
409
 
237
410
  # Lazy-loaded attributes
238
411
  self._r = None
@@ -247,76 +420,120 @@ class RFunctionCaller:
247
420
  self.ListVector = None
248
421
  self.NamedList = None
249
422
 
250
- if self.script_path and not self.script_path.exists():
251
- raise FileNotFoundError(f"R script not found: {self.script_path}")
252
-
253
- self.script_dir = self.script_path.parent if self.script_path else None
254
- self._script_loaded = False
423
+ # Internal state
255
424
  self._renv_activated = False
256
425
  self._packages_loaded = False
426
+ self._scripts_loaded = [False] * len(self.scripts)
257
427
 
258
428
  # -----------------------------------------------------------------
259
429
  # Internal: lazy R loading
260
430
  # -----------------------------------------------------------------
261
- def _ensure_r_loaded(self):
262
- if self._r is None:
263
- r = _require_rpy2(raise_on_missing=True)
264
- self._r = r
265
- self.ro = r["ro"]
266
- self.robjects = r["robjects"]
267
- self.pandas2ri = r["pandas2ri"]
268
- self.localconverter = r["localconverter"]
269
- self.IntVector = r["IntVector"]
270
- self.FloatVector = r["FloatVector"]
271
- self.BoolVector = r["BoolVector"]
272
- self.StrVector = r["StrVector"]
273
- self.ListVector = r["ListVector"]
274
- self.NamedList = r["NamedList"]
275
-
276
- # Activate renv
277
- if self.path_to_renv and not self._renv_activated:
278
- activate_renv(self.path_to_renv)
279
- self._renv_activated = True
280
-
281
- # Load packages
282
- if self.packages and not self._packages_loaded:
283
- for pkg in self.packages:
284
- try:
285
- self.robjects.r(f'suppressMessages(library("{pkg}"))')
286
- except Exception:
287
- logger.info(f"Package '{pkg}' not found. Installing...")
288
- self.robjects.r(
289
- f'install.packages("{pkg}", repos="https://cloud.r-project.org")'
431
+ def _ensure_r_loaded(self) -> None:
432
+ """
433
+ Ensure R runtime is initialized and all configured R scripts
434
+ are sourced exactly once, in isolated environments.
435
+ """
436
+ if self.robjects is None:
437
+ rpy2_dict = _ensure_rpy2()
438
+ self._RPY2 = rpy2_dict # cache in instance
439
+ self._r = rpy2_dict["ro"]
440
+ self.ro = rpy2_dict["robjects"]
441
+ self.robjects = rpy2_dict["robjects"]
442
+ self.pandas2ri = rpy2_dict["pandas2ri"]
443
+ self.localconverter = rpy2_dict["localconverter"]
444
+ self.IntVector = rpy2_dict["IntVector"]
445
+ self.FloatVector = rpy2_dict["FloatVector"]
446
+ self.BoolVector = rpy2_dict["BoolVector"]
447
+ self.StrVector = rpy2_dict["StrVector"]
448
+ self.ListVector = rpy2_dict["ListVector"]
449
+ self.NamedList = rpy2_dict["NamedList"]
450
+
451
+ r = self.robjects.r
452
+
453
+ # Ensure required R package
454
+ self.ensure_r_package("withr")
455
+
456
+ if not hasattr(self, "_namespaces"):
457
+ self._namespaces: dict[str, dict[str, Any]] = {}
458
+
459
+ # --- Iterate over scripts ---
460
+ for idx, script_entry in enumerate(self.scripts):
461
+ if self._scripts_loaded[idx]:
462
+ continue
463
+
464
+ script_entry = script_entry.resolve()
465
+
466
+ if script_entry.is_file():
467
+ r_files = [script_entry]
468
+ elif script_entry.is_dir():
469
+ r_files = sorted(script_entry.glob("*.R"))
470
+ if not r_files:
471
+ logger.warning(f"No .R files found in directory: {script_entry}")
472
+ self._scripts_loaded[idx] = True
473
+ continue
474
+ else:
475
+ raise ValueError(f"Invalid script path: {script_entry}")
476
+
477
+ for script_path in r_files:
478
+ ns_name = script_path.stem
479
+ logger.opt(depth=2).info(
480
+ "[rpy-bridge.RFunctionCaller] Loading R script '{}' as namespace '{}'",
481
+ script_path.name,
482
+ ns_name,
483
+ )
484
+
485
+ r("env <- new.env(parent=globalenv())")
486
+ r(f'script_path <- "{script_path.as_posix()}"')
487
+
488
+ r(
489
+ """
490
+ withr::with_dir(
491
+ dirname(script_path),
492
+ sys.source(basename(script_path), envir=env)
290
493
  )
291
- self.robjects.r(f'suppressMessages(library("{pkg}"))')
292
- self._packages_loaded = True
494
+ """
495
+ )
496
+
497
+ env_obj = r("env")
498
+ self._namespaces[ns_name] = {
499
+ name: env_obj[name]
500
+ for name in env_obj.keys()
501
+ if callable(env_obj[name])
502
+ }
503
+
504
+ logger.info(
505
+ f"[rpy-bridge.RFunctionCaller] Registered {len(self._namespaces[ns_name])} functions in namespace '{ns_name}'"
506
+ )
293
507
 
294
- # Source script
295
- if self.script_path and not self._script_loaded:
296
- self.robjects.r(f'setwd("{self.script_dir.as_posix()}")')
297
- self.robjects.r(f'source("{self.script_path.as_posix()}")')
298
- logger.info(f"R script sourced: {self.script_path.name}")
299
- self._script_loaded = True
508
+ self._scripts_loaded[idx] = True
509
+
510
+ # -----------------------------------------------------------------
511
+ # Autocomplete-friendly attribute access for script namespaces
512
+ # -----------------------------------------------------------------
513
+ def __getattr__(self, name: str):
514
+ if "_namespaces" in self.__dict__ and name in self._namespaces:
515
+ ns_env = self._namespaces[name]
516
+ return NamespaceWrapper(ns_env)
517
+ raise AttributeError(f"'RFunctionCaller' object has no attribute '{name}'")
300
518
 
301
519
  def _clean_scalar(self, x):
302
520
  """
303
521
  Clean R-style missing values to pandas/NumPy equivalents.
304
522
  Called inside _r2py on each vector element; atomic/scalar only.
305
523
  """
306
- r = self._r
307
- ro = r["robjects"]
524
+ robjects = self.robjects
308
525
 
309
526
  if x is None:
310
527
  return None
311
528
 
312
529
  if x in (
313
- getattr(ro, "NA_Real", None),
314
- getattr(ro, "NA_Integer", None),
315
- getattr(ro, "NA_Logical", None),
530
+ getattr(robjects, "NA_Real", None),
531
+ getattr(robjects, "NA_Integer", None),
532
+ getattr(robjects, "NA_Logical", None),
316
533
  ):
317
534
  return None
318
535
 
319
- if x is getattr(ro, "NA_Character", None):
536
+ if x is getattr(robjects, "NA_Character", None):
320
537
  return None
321
538
 
322
539
  if isinstance(x, float) and np.isnan(x):
@@ -324,6 +541,105 @@ class RFunctionCaller:
324
541
 
325
542
  return x
326
543
 
544
+ def list_namespaces(self) -> list[str]:
545
+ """
546
+ Return the names of all loaded script namespaces.
547
+
548
+ Returns
549
+ -------
550
+ list[str]
551
+ Names of sourced R script namespaces.
552
+ """
553
+ self._ensure_r_loaded()
554
+ return list(self._namespaces.keys())
555
+
556
+ def list_namespace_functions(self, namespace: str) -> list[str]:
557
+ """
558
+ Return all callable functions in a specific namespace.
559
+ """
560
+ self._ensure_r_loaded()
561
+ if namespace not in self._namespaces:
562
+ raise ValueError(f"Namespace '{namespace}' not found")
563
+ return [k for k, v in self._namespaces[namespace].items() if callable(v)]
564
+
565
+ def _get_package_functions(self, pkg: str) -> list[str]:
566
+ """
567
+ Return a list of callable functions from a loaded R package.
568
+ """
569
+ r = self.robjects.r
570
+ try:
571
+ all_objs = list(r[f'ls("package:{pkg}")'])
572
+ funcs = [
573
+ name
574
+ for name in all_objs
575
+ if r(f'is.function(get("{name}", envir=asNamespace("{pkg}")))')[0]
576
+ ]
577
+ return funcs
578
+ except Exception:
579
+ logger.warning(f"Failed to list functions for package '{pkg}'")
580
+ return []
581
+
582
+ def list_all_functions(
583
+ self, include_packages: bool = False
584
+ ) -> dict[str, list[str]]:
585
+ """
586
+ Return all callable R functions grouped by script namespace and package.
587
+ """
588
+ self._ensure_r_loaded()
589
+ all_funcs = {}
590
+
591
+ # --- Script namespaces ---
592
+ for ns_name, ns_env in self._namespaces.items():
593
+ funcs = [name for name, val in ns_env.items() if callable(val)]
594
+ all_funcs[ns_name] = funcs
595
+
596
+ # --- Loaded R packages ---
597
+ if include_packages:
598
+ r = self.robjects.r
599
+ try:
600
+ pkgs = r("loadedNamespaces()")
601
+ for pkg in pkgs:
602
+ funcs = self._get_package_functions(pkg)
603
+ if not funcs:
604
+ # Add a placeholder note
605
+ funcs = [
606
+ "[See official documentation for functions, datasets, and objects]"
607
+ ]
608
+ all_funcs[pkg] = funcs
609
+ except Exception:
610
+ pass
611
+
612
+ return all_funcs
613
+
614
+ def print_function_tree(
615
+ self, include_packages: bool = False, max_display: int = 10
616
+ ):
617
+ """
618
+ Pretty-print available R functions grouped by namespace.
619
+
620
+ Parameters
621
+ ----------
622
+ include_packages : bool, default False
623
+ Whether to include functions from loaded R packages.
624
+
625
+ max_display : int, default 10
626
+ Maximum number of functions displayed per namespace.
627
+
628
+ Notes
629
+ -----
630
+ This method is intended for interactive exploration and debugging.
631
+ """
632
+ all_funcs = self.list_all_functions(include_packages=include_packages)
633
+
634
+ for ns_name, funcs in all_funcs.items():
635
+ if not funcs:
636
+ continue
637
+ print(f"{ns_name}/")
638
+ for f in sorted(funcs)[:max_display]:
639
+ print(f" {f}")
640
+ if len(funcs) > max_display:
641
+ print(" ...")
642
+
327
643
  # -----------------------------------------------------------------
328
644
  # Python -> R conversion
329
645
  # -----------------------------------------------------------------
@@ -340,92 +656,56 @@ class RFunctionCaller:
340
656
  StrVector = self.StrVector
341
657
  ListVector = self.ListVector
342
658
  localconverter = self.localconverter
343
- import pandas as pd
344
- import rpy2.robjects.vectors as rvec
345
-
346
- # Pass through existing R objects
347
- if isinstance(
348
- obj,
349
- (
350
- rvec.IntVector,
351
- rvec.FloatVector,
352
- rvec.BoolVector,
353
- rvec.StrVector,
354
- rvec.ListVector,
355
- robjects.DataFrame,
356
- ),
357
- ):
659
+
660
+ r_types = (
661
+ robjects.vectors.IntVector,
662
+ robjects.vectors.FloatVector,
663
+ robjects.vectors.BoolVector,
664
+ robjects.vectors.StrVector,
665
+ robjects.vectors.ListVector,
666
+ robjects.DataFrame,
667
+ )
668
+ if isinstance(obj, r_types):
358
669
  return obj
359
670
 
671
+ def is_na(x):
672
+ return x is None or x is pd.NA or (isinstance(x, float) and pd.isna(x))
673
+
360
674
  with localconverter(robjects.default_converter + pandas2ri.converter):
361
- if obj is None or obj is pd.NA:
675
+ if is_na(obj):
362
676
  return robjects.NULL
363
-
364
- # DataFrame → data.frame
365
677
  if isinstance(obj, pd.DataFrame):
366
678
  return pandas2ri.py2rpy(obj)
367
-
368
- # Series → vector
369
679
  if isinstance(obj, pd.Series):
370
680
  return self._py2r(obj.tolist())
371
-
372
- # Scalars
373
681
  if isinstance(obj, (int, float, bool, str)):
374
682
  return obj
375
-
376
- # Lists
377
683
  if isinstance(obj, list):
378
684
  if len(obj) == 0:
379
685
  return FloatVector([])
380
- elif all(isinstance(x, (int, float)) or x is None for x in obj):
381
- return FloatVector(
382
- [robjects.NA_Real if x is None else float(x) for x in obj]
383
- )
384
-
385
- def is_na(x):
386
- return (
387
- x is None or x is pd.NA or (isinstance(x, float) and pd.isna(x))
388
- )
389
686
 
390
- # Homogeneous numeric
391
- if all(
392
- isinstance(x, (int, float)) and not isinstance(x, bool) or is_na(x)
393
- for x in obj
394
- ):
687
+ types = set(type(x) for x in obj if not is_na(x))
688
+ if types <= {int, float}:
395
689
  return FloatVector(
396
690
  [robjects.NA_Real if is_na(x) else float(x) for x in obj]
397
691
  )
398
-
399
- # Homogeneous bool
400
- if all(isinstance(x, bool) or is_na(x) for x in obj):
692
+ if types <= {bool}:
401
693
  return BoolVector(
402
694
  [robjects.NA_Logical if is_na(x) else x for x in obj]
403
695
  )
404
-
405
- # Homogeneous str
406
- if all(isinstance(x, str) or is_na(x) for x in obj):
696
+ if types <= {str}:
407
697
  return StrVector(
408
698
  [robjects.NA_Character if is_na(x) else x for x in obj]
409
699
  )
410
-
411
- # Mixed or nested list → ListVector with positional keys
412
700
  return ListVector({str(i): self._py2r(v) for i, v in enumerate(obj)})
413
-
414
- # Dict → NamedList
415
701
  if isinstance(obj, dict):
416
702
  return ListVector({k: self._py2r(v) for k, v in obj.items()})
417
-
418
703
  raise NotImplementedError(f"Cannot convert Python object to R: {type(obj)}")
419
704
 
420
705
  # -----------------------------------------------------------------
421
706
  # R -> Python conversion
422
707
  # -----------------------------------------------------------------
423
708
  def _r2py(self, obj, top_level=True):
424
- """
425
- Convert R objects to Python objects robustly.
426
- Handles DataFrames, NamedList/ListVector, atomic vectors, and NULL.
427
- """
428
- r = self._r
429
709
  robjects = self.robjects
430
710
  NamedList = self.NamedList
431
711
  ListVector = self.ListVector
@@ -433,7 +713,7 @@ class RFunctionCaller:
433
713
  IntVector = self.IntVector
434
714
  FloatVector = self.FloatVector
435
715
  BoolVector = self.BoolVector
436
- NULLType = r["NULLType"]
716
+ NULLType = self._RPY2["NULLType"]
437
717
  lc = self.localconverter
438
718
  pandas2ri = self.pandas2ri
439
719
 
@@ -444,12 +724,10 @@ class RFunctionCaller:
444
724
  with lc(robjects.default_converter + pandas2ri.converter):
445
725
  df = robjects.conversion.rpy2py(obj)
446
726
  df = postprocess_r_dataframe(df)
447
- df = clean_r_missing(df, caller=self)
448
- return df
727
+ return clean_r_missing(df, caller=self)
449
728
 
450
729
  if isinstance(obj, (NamedList, ListVector)):
451
730
  py_obj = r_namedlist_to_dict(obj, caller=self, top_level=top_level)
452
- # Auto-unpack single-element lists only at top-level
453
731
  if isinstance(py_obj, list) and len(py_obj) == 1 and top_level:
454
732
  return py_obj[0]
455
733
  return py_obj
@@ -465,58 +743,114 @@ class RFunctionCaller:
465
743
  # -----------------------------------------------------------------
466
744
  # Public: ensure R package is available
467
745
  # -----------------------------------------------------------------
468
- def ensure_r_package(self, pkg_name: str):
746
+ def ensure_r_package(self, pkg: str):
469
747
  r = self.robjects.r
470
748
  try:
471
- r(f'suppressMessages(library("{pkg_name}", character.only=TRUE))')
749
+ r(f'suppressMessages(library("{pkg}", character.only=TRUE))')
472
750
  except Exception:
473
- r(f'install.packages("{pkg_name}", repos="https://cloud.r-project.org")')
474
- r(f'suppressMessages(library("{pkg_name}", character.only=TRUE))')
751
+ logger.info(f"[rpy-bridge.RFunctionCaller] Package '{pkg}' not found.")
752
+ logger.warning(
753
+ f"[rpy-bridge.RFunctionCaller] Installing missing R package: {pkg}"
754
+ )
755
+ r(f'install.packages("{pkg}", repos="https://cloud.r-project.org")')
756
+ r(f'suppressMessages(library("{pkg}", character.only=TRUE))')
475
757
 
476
758
  # -----------------------------------------------------------------
477
759
  # Public: call an R function
478
760
  # -----------------------------------------------------------------
479
761
  def call(self, func_name: str, *args, **kwargs):
480
762
  """
481
- Call an R function safely. Supports:
482
- - functions defined in scripts
483
- - base R functions
484
- - functions in loaded packages
763
+ Call an R function.
764
+
765
+ The function may be defined in:
766
+ * a sourced R script
767
+ * an installed R package (using ``package::function`` syntax)
768
+ * base R
769
+
770
+ Parameters
771
+ ----------
772
+ func_name : str
773
+ Name of the R function to call. Package functions should be specified
774
+ as ``package::function``.
775
+
776
+ *args
777
+ Positional arguments passed to the R function.
778
+
779
+ **kwargs
780
+ Named arguments passed to the R function.
781
+
782
+ Returns
783
+ -------
784
+ object
785
+ The result of the R function, converted to a Python object.
786
+
787
+ Examples
788
+ --------
789
+ >>> rfc.call("sum", [1, 2, 3])
790
+ >>> rfc.call("dplyr::n_distinct", [1, 2, 2, 3])
791
+ >>> rfc.call("add_and_scale", 2, 3, scale=10)
485
792
  """
793
+
486
794
  self._ensure_r_loaded()
487
795
 
488
- # --- Find the function ---
489
796
  func = None
490
- try:
491
- func = self.robjects.globalenv[func_name] # script-defined
492
- except KeyError:
493
- try:
494
- func = self.robjects.r[func_name] # base or package function
495
- except KeyError:
496
- # --- Added: handle namespaced functions like stats::median ---
497
- if "::" in func_name:
498
- pkg, fname = func_name.split("::", 1)
499
- try:
500
- func = self.robjects.r(f"{pkg}::{fname}")
501
- except Exception as e:
502
- raise RuntimeError(
503
- f"Failed to load R function '{func_name}' via namespace: {e}"
504
- ) from e
505
-
506
- if func is None:
507
- raise ValueError(f"R function '{func_name}' not found.")
508
-
509
- # --- Convert Python args to R ---
797
+ source_info = None
798
+
799
+ if "::" in func_name:
800
+ ns_name, fname = func_name.split("::", 1)
801
+ if ns_name in self._namespaces:
802
+ ns_env = self._namespaces[ns_name]
803
+ if fname in ns_env:
804
+ func = ns_env[fname]
805
+ source_info = f"script namespace '{ns_name}'"
806
+ else:
807
+ raise ValueError(
808
+ f"Function '{fname}' not found in R script namespace '{ns_name}'"
809
+ )
810
+ else:
811
+ try:
812
+ func = self.robjects.r(f"{ns_name}::{fname}")
813
+ source_info = f"R package '{ns_name}'"
814
+ except Exception as e:
815
+ raise RuntimeError(
816
+ f"Failed to resolve R function '{func_name}': {e}"
817
+ ) from e
818
+
819
+ else:
820
+ for ns_name, ns_env in self._namespaces.items():
821
+ if func_name in ns_env:
822
+ func = ns_env[func_name]
823
+ source_info = f"script namespace '{ns_name}'"
824
+ break
825
+
826
+ if func is None:
827
+ try:
828
+ func = self.robjects.globalenv[func_name]
829
+ source_info = "global environment"
830
+ except KeyError:
831
+ pass
832
+
833
+ if func is None:
834
+ try:
835
+ func = self.robjects.r[func_name]
836
+ source_info = "base R / loaded package"
837
+ except KeyError:
838
+ raise ValueError(
839
+ f"R function '{func_name}' not found in any namespace, global env, or base R."
840
+ )
841
+
510
842
  r_args = [self._py2r(a) for a in args]
511
843
  r_kwargs = {k: self._py2r(v) for k, v in kwargs.items()}
512
844
 
513
- # --- Call safely ---
514
845
  try:
515
846
  result = func(*r_args, **r_kwargs)
516
847
  except Exception as e:
517
- raise RuntimeError(f"Error calling R function '{func_name}': {e}")
848
+ raise RuntimeError(
849
+ f"Error calling R function '{func_name}' from {source_info}: {e}"
850
+ ) from e
851
+
852
+ _log_r_call(func_name, source_info)
518
853
 
519
- # --- Convert R result back to Python ---
520
854
  return self._r2py(result)
521
855
 
522
856
 
@@ -525,10 +859,6 @@ class RFunctionCaller:
525
859
  # Utility functions for R ↔ Python
526
860
  # ------------------------------
527
861
  def r_namedlist_to_dict(namedlist, caller: RFunctionCaller, top_level=False):
528
- """
529
- Recursively convert an R NamedList or ListVector to a Python dictionary.
530
- Uses the caller._r2py method for nested conversions.
531
- """
532
862
  r = _ensure_rpy2()
533
863
  NamedList = r["NamedList"]
534
864
  ListVector = r["ListVector"]
@@ -536,31 +866,24 @@ def r_namedlist_to_dict(namedlist, caller: RFunctionCaller, top_level=False):
536
866
  if isinstance(namedlist, (NamedList, ListVector)):
537
867
  names = namedlist.names if not callable(namedlist.names) else namedlist.names()
538
868
 
539
- # Detect positional (unnamed) list
540
869
  if names and all(str(i) == str(name) for i, name in enumerate(names)):
541
870
  out = []
542
871
  for v in namedlist:
543
- # Nested elements are never top-level
544
872
  val = caller._r2py(v, top_level=False)
545
873
  out.append(val)
546
874
  return out
547
875
 
548
- # Otherwise dict
549
876
  result = {}
550
877
  for i, val in enumerate(namedlist):
551
878
  key = names[i] if names and i < len(names) else str(i)
552
- v_py = caller._r2py(val, top_level=False) # nested elements
879
+ v_py = caller._r2py(val, top_level=False)
553
880
  result[str(key)] = v_py
554
881
  return result
555
882
 
556
- # Fallback: scalar/vector at the very top
557
883
  return caller._r2py(namedlist, top_level=top_level)
558
884
 
559
885
 
560
886
  def clean_r_dataframe(r_df: pd.DataFrame) -> pd.DataFrame:
561
- """
562
- Clean an R data.frame by removing non-structural attributes like .groups and .rows.
563
- """
564
887
  for attr in [".groups", ".rows"]:
565
888
  try:
566
889
  del r_df.attrs[attr]
@@ -570,18 +893,11 @@ def clean_r_dataframe(r_df: pd.DataFrame) -> pd.DataFrame:
570
893
 
571
894
 
572
895
  def fix_string_nans(df: pd.DataFrame) -> pd.DataFrame:
573
- """
574
- Replace string NAs or empty strings with pd.NA.
575
- """
576
896
  return df.replace(["nan", "NaN", "NA", "na", ""], pd.NA)
577
897
 
578
898
 
579
899
  def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
580
- """
581
- Normalize dtypes in a single DataFrame after R conversion.
582
- """
583
900
  df = df.replace(["", "nan", "NaN", "NA", "na"], pd.NA)
584
-
585
901
  for col in df.columns:
586
902
  series = df[col]
587
903
  if pd.api.types.is_object_dtype(series):
@@ -594,18 +910,10 @@ def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
594
910
 
595
911
 
596
912
  def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
597
- """
598
- Post-process R DataFrame:
599
- - Convert R NA_integer_ sentinel (-2147483648) to pd.NA
600
- - Convert R-style numeric dates to datetime
601
- - Remove timezone from datetime columns
602
- """
603
913
  for col in df.columns:
604
914
  series = df[col]
605
-
606
915
  if pd.api.types.is_integer_dtype(series):
607
916
  df[col] = series.mask(series == -2147483648, pd.NA)
608
-
609
917
  if pd.api.types.is_numeric_dtype(series):
610
918
  values = series.dropna()
611
919
  if not values.empty and values.between(10000, 40000).all():
@@ -615,24 +923,15 @@ def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
615
923
  )
616
924
  except Exception:
617
925
  pass
618
-
619
926
  if pd.api.types.is_datetime64tz_dtype(series):
620
927
  df[col] = series.dt.tz_localize(None)
621
-
622
928
  return df
623
929
 
624
930
 
625
931
  def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
626
- """
627
- Apply a series of fixes to a DataFrame converted from R:
628
- - Type corrections
629
- - String NA normalization
630
- - Index normalization
631
- """
632
932
  df = fix_r_dataframe_types(df)
633
933
  df = fix_string_nans(df)
634
934
  df = normalize_single_df_dtypes(df)
635
-
636
935
  if df.index.dtype == object:
637
936
  try:
638
937
  int_index = df.index.astype(int)
@@ -644,62 +943,37 @@ def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
644
943
 
645
944
 
646
945
  def clean_r_missing(obj, caller: RFunctionCaller):
647
- """
648
- Post-process R return objects for downstream Python use.
649
- Recursively convert R-style missing values to pandas/NumPy:
650
- - NA_integer_, NA_real_, NA_logical_ → np.nan
651
- - NA_character_ → pd.NA
652
- """
653
- r = _ensure_rpy2()
654
- ro = r["robjects"]
655
-
946
+ robjects = caller.robjects
656
947
  NA_MAP = {
657
- getattr(ro, "NA_Real", None): np.nan,
658
- getattr(ro, "NA_Integer", None): np.nan,
659
- getattr(ro, "NA_Logical", None): np.nan,
660
- getattr(ro, "NA_Character", None): pd.NA,
948
+ getattr(robjects, "NA_Real", None): np.nan,
949
+ getattr(robjects, "NA_Integer", None): np.nan,
950
+ getattr(robjects, "NA_Logical", None): np.nan,
951
+ getattr(robjects, "NA_Character", None): pd.NA,
661
952
  }
662
953
 
663
954
  if isinstance(obj, pd.DataFrame):
664
955
  for col in obj.columns:
665
956
  obj[col] = obj[col].apply(lambda x: clean_r_missing(x, caller))
666
957
  return obj
667
-
668
958
  elif isinstance(obj, dict):
669
959
  return {k: clean_r_missing(v, caller) for k, v in obj.items()}
670
-
671
960
  elif isinstance(obj, list):
672
961
  return [clean_r_missing(v, caller) for v in obj]
673
-
674
962
  else:
675
963
  return NA_MAP.get(obj, obj)
676
964
 
677
965
 
678
- # %%
679
- # -------------------------------------------
680
- # Functions here onwards are utility functions
681
- # for comparing R and Python DataFrames.
682
- # -------------------------------------------
683
-
684
-
966
+ # ---------------------------------------------------------------------
967
+ # DataFrame comparison utilities
968
+ # ---------------------------------------------------------------------
685
969
  def normalize_dtypes(
686
970
  df1: pd.DataFrame, df2: pd.DataFrame
687
971
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
688
- """
689
- Aligns column dtypes across two DataFrames for accurate comparison.
690
- - Replaces empty strings with pd.NA.
691
- - Attempts to coerce strings to numeric where applicable.
692
- - Aligns dtypes between matching columns (e.g. float64 vs int64).
693
- """
694
972
  for col in df1.columns.intersection(df2.columns):
695
- # Replace empty strings with NA
696
973
  df1[col] = df1[col].replace("", pd.NA)
697
974
  df2[col] = df2[col].replace("", pd.NA)
698
-
699
975
  s1, s2 = df1[col], df2[col]
700
976
  dtype1, dtype2 = s1.dtype, s2.dtype
701
-
702
- # If one is numeric and the other is object, try coercing both to numeric
703
977
  if (
704
978
  pd.api.types.is_numeric_dtype(dtype1)
705
979
  and pd.api.types.is_object_dtype(dtype2)
@@ -710,98 +984,57 @@ def normalize_dtypes(
710
984
  try:
711
985
  df1[col] = pd.to_numeric(s1, errors="coerce")
712
986
  df2[col] = pd.to_numeric(s2, errors="coerce")
713
- continue # skip to next column if coercion succeeds
987
+ continue
714
988
  except Exception:
715
- pass # fallback to next block if coercion fails
716
-
717
- # If both are numeric but of different types (e.g., int vs float), unify to float64
989
+ pass
718
990
  if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(
719
991
  dtype2
720
992
  ):
721
993
  df1[col] = df1[col].astype("float64")
722
994
  df2[col] = df2[col].astype("float64")
723
995
  continue
724
-
725
- # If both are objects or strings, convert both to str for equality comparison
726
996
  if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
727
997
  df1[col] = df1[col].astype(str)
728
998
  df2[col] = df2[col].astype(str)
729
-
730
999
  return df1, df2
731
1000
 
732
1001
 
733
- # %%
734
1002
  def align_numeric_dtypes(
735
1003
  df1: pd.DataFrame, df2: pd.DataFrame
736
1004
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
737
- """
738
- Ensure aligned numeric dtypes between two DataFrames for accurate comparison.
739
- Converts between int, float, and numeric-looking strings where appropriate.
740
- Also handles NA and empty string normalization.
741
- """
742
1005
  for col in df1.columns.intersection(df2.columns):
743
- s1, s2 = df1[col], df2[col]
744
-
745
- # Replace empty strings with NA to avoid type promotion issues
746
- s1 = s1.replace("", pd.NA)
747
- s2 = s2.replace("", pd.NA)
748
-
749
- # Try to coerce both to numeric (non-destructive)
1006
+ s1, s2 = df1[col].replace("", pd.NA), df2[col].replace("", pd.NA)
750
1007
  try:
751
1008
  s1_num = pd.to_numeric(s1, errors="coerce")
752
1009
  s2_num = pd.to_numeric(s2, errors="coerce")
753
-
754
- # If at least one successfully converts and it's not all NaN
755
1010
  if not s1_num.isna().all() or not s2_num.isna().all():
756
1011
  df1[col] = s1_num.astype("float64")
757
1012
  df2[col] = s2_num.astype("float64")
758
- continue # move to next column
1013
+ continue
759
1014
  except Exception:
760
1015
  pass
761
-
762
- # Otherwise, fall back to original values
763
- df1[col] = s1
764
- df2[col] = s2
765
-
1016
+ df1[col], df2[col] = s1, s2
766
1017
  return df1, df2
767
1018
 
768
1019
 
769
- # %%
770
1020
  def compare_r_py_dataframes(
771
1021
  df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8
772
1022
  ) -> dict:
773
- """
774
- Compare a Python DataFrame (df1) with an R DataFrame converted to pandas (df2).
775
-
776
- Returns:
777
- dict with mismatch diagnostics, preserving original indices in diffs.
778
- """
779
-
780
1023
  results: dict[str, Any] = {
781
1024
  "shape_mismatch": False,
782
1025
  "columns_mismatch": False,
783
1026
  "index_mismatch": False,
784
- "numeric_diffs": {}, # type: dict[str, pd.DataFrame]
785
- "non_numeric_diffs": {}, # type: dict[str, pd.DataFrame]
1027
+ "numeric_diffs": {},
1028
+ "non_numeric_diffs": {},
786
1029
  }
787
-
788
- # --- Preprocessing: fix R-specific issues ---
789
1030
  df2 = fix_r_dataframe_types(df2)
790
-
791
- # --- Replace common string NAs with proper pd.NA ---
792
1031
  df1 = fix_string_nans(df1)
793
1032
  df2 = fix_string_nans(df2)
794
-
795
- # --- Normalize and align dtypes ---
796
1033
  df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
797
1034
  df1, df2 = align_numeric_dtypes(df1, df2)
798
-
799
- # --- Check shape ---
800
1035
  if df1.shape != df2.shape:
801
1036
  results["shape_mismatch"] = True
802
1037
  print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
803
-
804
- # --- Check columns ---
805
1038
  if set(df1.columns) != set(df2.columns):
806
1039
  results["columns_mismatch"] = True
807
1040
  print("[Warning] Column mismatch:")
@@ -810,21 +1043,13 @@ def compare_r_py_dataframes(
810
1043
  common_cols = df1.columns.intersection(df2.columns)
811
1044
  else:
812
1045
  common_cols = df1.columns
813
-
814
- # --- Ensure columns are the same order ---
815
- df1_aligned = df1.loc[:, common_cols]
816
- df2_aligned = df2.loc[:, common_cols]
817
-
818
- # --- Compare values column by column ---
1046
+ df1_aligned, df2_aligned = df1.loc[:, common_cols], df2.loc[:, common_cols]
819
1047
  for col in common_cols:
820
- col_py = df1_aligned[col]
821
- col_r = df2_aligned[col]
822
-
1048
+ col_py, col_r = df1_aligned[col], df2_aligned[col]
823
1049
  if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(
824
1050
  col_r
825
1051
  ):
826
1052
  col_py, col_r = col_py.align(col_r)
827
-
828
1053
  close = np.isclose(
829
1054
  col_py.fillna(np.nan),
830
1055
  col_r.fillna(np.nan),
@@ -832,30 +1057,15 @@ def compare_r_py_dataframes(
832
1057
  equal_nan=True,
833
1058
  )
834
1059
  if not close.all():
835
- diffs = pd.DataFrame(
836
- {
837
- "df1": col_py[~close],
838
- "df2": col_r[~close],
839
- }
1060
+ results["numeric_diffs"][col] = pd.DataFrame(
1061
+ {"df1": col_py[~close], "df2": col_r[~close]}
840
1062
  )
841
- results["numeric_diffs"][col] = diffs
842
-
843
1063
  else:
844
- # Treat missing values as equal: create mask where values differ excluding matching NAs
845
1064
  unequal = ~col_py.eq(col_r)
846
1065
  both_na = col_py.isna() & col_r.isna()
847
1066
  unequal = unequal & ~both_na
848
-
849
1067
  if unequal.any():
850
- diffs = pd.DataFrame(
851
- {
852
- "df1": col_py[unequal],
853
- "df2": col_r[unequal],
854
- }
1068
+ results["non_numeric_diffs"][col] = pd.DataFrame(
1069
+ {"df1": col_py[unequal], "df2": col_r[unequal]}
855
1070
  )
856
- results["non_numeric_diffs"][col] = diffs
857
-
858
1071
  return results
859
-
860
-
861
- # %%
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rpy-bridge
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution.
5
5
  Author-email: Victoria Cheung <victoriakcheung@gmail.com>
6
6
  License: MIT License
@@ -150,12 +150,12 @@ uv sync
150
150
  from pathlib import Path
151
151
  from rpy_bridge import RFunctionCaller
152
152
 
153
- caller = RFunctionCaller(
153
+ rfc = RFunctionCaller(
154
154
  path_to_renv=Path("/path/to/project"),
155
- script_path=Path("/path/to/script.R"),
155
+ script=Path("/path/to/script.R"),
156
156
  )
157
157
 
158
- summary_df = caller.call("summarize_cohort", cohort_df)
158
+ summary_df = rfc.call("summarize_cohort", cohort_df)
159
159
  ```
160
160
 
161
161
  ---
@@ -0,0 +1,8 @@
1
+ rpy_bridge/__init__.py,sha256=VDCx-CiTBJO0cMp59v-gyJGBVYHjLjATTIdtYxBsK5Q,875
2
+ rpy_bridge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ rpy_bridge/rpy2_utils.py,sha256=1W1Lgt0HI3TGs20GugHceFih1uLTTilz_pmkzNkPujY,37516
4
+ rpy_bridge-0.3.5.dist-info/licenses/LICENSE,sha256=JwbWVcSfeoLfZ2M_ZiyygKVDvhBDW3zbqTWwXOJwmrA,1276
5
+ rpy_bridge-0.3.5.dist-info/METADATA,sha256=uZBsfC-lyYhYQfVvLJPGGi2XCwfq-8cSbILCFgmPAFs,9580
6
+ rpy_bridge-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ rpy_bridge-0.3.5.dist-info/top_level.txt,sha256=z9UZ77ZuUPoLqMDQEpP4btstsaM1IpXb9Cn9yBVaHmU,11
8
+ rpy_bridge-0.3.5.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- rpy_bridge/__init__.py,sha256=1cyWVzhVnSqMRY6OkSo8RYjTKWjmaV9WR-otu4Y5dJc,829
2
- rpy_bridge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- rpy_bridge/rpy2_utils.py,sha256=n58oSoqkZRv320dtgxEW597G8PrzCO8jCeGPZQH_5t8,29234
4
- rpy_bridge-0.3.3.dist-info/licenses/LICENSE,sha256=JwbWVcSfeoLfZ2M_ZiyygKVDvhBDW3zbqTWwXOJwmrA,1276
5
- rpy_bridge-0.3.3.dist-info/METADATA,sha256=Frw8qT49nSrWClRKCMKfU8cvLQVVKvpz3By99rTB_3A,9591
6
- rpy_bridge-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- rpy_bridge-0.3.3.dist-info/top_level.txt,sha256=z9UZ77ZuUPoLqMDQEpP4btstsaM1IpXb9Cn9yBVaHmU,11
8
- rpy_bridge-0.3.3.dist-info/RECORD,,