rpy-bridge 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rpy-bridge
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution.
5
5
  Author-email: Victoria Cheung <victoriakcheung@gmail.com>
6
6
  License: MIT License
@@ -31,12 +31,17 @@ License: MIT License
31
31
 
32
32
  Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
33
33
  Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
34
+ Keywords: python,r,rpy2,python-r,interoperability,data-science,statistics,bioinformatics
34
35
  Classifier: License :: OSI Approved :: MIT License
35
36
  Classifier: Programming Language :: Python
36
37
  Classifier: Programming Language :: Python :: 3
37
38
  Classifier: Programming Language :: Python :: 3.11
38
39
  Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Intended Audience :: Developers
41
+ Classifier: Intended Audience :: Science/Research
39
42
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
43
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
44
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
40
45
  Requires-Python: >=3.11
41
46
  Description-Content-Type: text/markdown
42
47
  License-File: LICENSE
@@ -1,12 +1,24 @@
1
1
  [project]
2
2
  name = "rpy-bridge"
3
- version = "0.3.2"
3
+ version = "0.3.4"
4
4
  description = "Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution."
5
5
  readme = "README.md"
6
6
  license = { file = "LICENSE" }
7
7
  authors = [
8
8
  { name = "Victoria Cheung", email = "victoriakcheung@gmail.com" }
9
9
  ]
10
+
11
+ keywords = [
12
+ "python",
13
+ "r",
14
+ "rpy2",
15
+ "python-r",
16
+ "interoperability",
17
+ "data-science",
18
+ "statistics",
19
+ "bioinformatics",
20
+ ]
21
+
10
22
  requires-python = ">=3.11"
11
23
 
12
24
  dependencies = [
@@ -21,7 +33,12 @@ classifiers = [
21
33
  "Programming Language :: Python :: 3",
22
34
  "Programming Language :: Python :: 3.11",
23
35
  "Programming Language :: Python :: 3.12",
36
+ "Intended Audience :: Developers",
37
+ "Intended Audience :: Science/Research",
24
38
  "Topic :: Scientific/Engineering :: Bio-Informatics",
39
+ "Topic :: Scientific/Engineering :: Information Analysis",
40
+ "Topic :: Software Development :: Libraries :: Python Modules",
41
+
25
42
  ]
26
43
 
27
44
  [project.optional-dependencies]
@@ -6,6 +6,7 @@ continue importing directly from ``rpy_bridge``.
6
6
  """
7
7
 
8
8
  from .rpy2_utils import (
9
+ NamespaceWrapper,
9
10
  RFunctionCaller,
10
11
  activate_renv,
11
12
  align_numeric_dtypes,
@@ -23,6 +24,7 @@ from .rpy2_utils import (
23
24
  __all__ = [
24
25
  "activate_renv",
25
26
  "RFunctionCaller",
27
+ "NamespaceWrapper",
26
28
  "r_namedlist_to_dict",
27
29
  "clean_r_dataframe",
28
30
  "fix_string_nans",
@@ -1,10 +1,23 @@
1
1
  """
2
- Wrapper for calling R functions from Python using rpy2.
2
+ RPython Integration Utility
3
3
 
4
+ Provides tools to load R scripts, activate renv environments, and call R functions
5
+ directly from Python, with automatic conversion between R and Python data types.
6
+
7
+ ----------
8
+ Requirements
4
9
  ----------
5
- ** R must be installed and accessible in your environment **
6
- Ensure compatibility with your R project's renv setup (or other virtual env/base env if that's what you're using).
10
+ - R must be installed and accessible in your system environment.
11
+ - Ensure compatibility with your R project's renv setup (or any other R environment you use).
12
+
13
+ Features
7
14
  ----------
15
+ - Lazy loading of rpy2 and R runtime.
16
+ - Activation of renv environments for isolated R project dependencies.
17
+ - Support for sourcing individual R scripts or directories of scripts.
18
+ - Namespace-based access to R functions.
19
+ - Automatic conversion between R vectors, data frames, and Python types (pandas, lists, scalars).
20
+ - Utilities for cleaning and aligning data frames between R and Python.
8
21
  """
9
22
 
10
23
  # ruff: noqa: E402
@@ -29,7 +42,8 @@ if TYPE_CHECKING:
29
42
 
30
43
  from loguru import Logger as LoguruLogger
31
44
 
32
- LoggerType = LoggerType = Union[LoguruLogger, logging_module.Logger]
45
+ LoggerType = Union[LoguruLogger, logging_module.Logger]
46
+
33
47
  else:
34
48
  LoggerType = None # runtime doesn’t need the type object
35
49
 
@@ -64,7 +78,9 @@ def ensure_rpy2_available() -> None:
64
78
 
65
79
 
66
80
  def find_r_home() -> str | None:
67
- """Detect system R installation."""
81
+ """
82
+ Detect system R installation.
83
+ """
68
84
  try:
69
85
  r_home = subprocess.check_output(
70
86
  ["R", "--vanilla", "--slave", "-e", "cat(R.home())"],
@@ -88,9 +104,13 @@ def find_r_home() -> str | None:
88
104
  return None
89
105
 
90
106
 
91
- R_HOME = find_r_home()
92
- if not R_HOME:
93
- raise RuntimeError("R not found. Please install R or add it to PATH.")
107
+ if "R_HOME" not in os.environ:
108
+ R_HOME = find_r_home()
109
+ if not R_HOME:
110
+ raise RuntimeError("R not found. Please install R or add it to PATH.")
111
+ os.environ["R_HOME"] = R_HOME
112
+ else:
113
+ R_HOME = os.environ["R_HOME"]
94
114
 
95
115
  logger.info(f"R_HOME = {R_HOME}")
96
116
  os.environ["R_HOME"] = R_HOME
@@ -107,7 +127,9 @@ if sys.platform == "darwin":
107
127
  elif sys.platform.startswith("linux"):
108
128
  lib_path = os.path.join(R_HOME, "lib")
109
129
  ld_path = os.environ.get("LD_LIBRARY_PATH", "")
110
- os.environ["LD_LIBRARY_PATH"] = f"{lib_path}:{ld_path}"
130
+ if lib_path not in ld_path.split(":"):
131
+ os.environ["LD_LIBRARY_PATH"] = f"{lib_path}:{ld_path}"
132
+
111
133
 
112
134
  # ---------------------------------------------------------------------
113
135
  # Lazy rpy2 import machinery
@@ -210,29 +232,95 @@ def activate_renv(path_to_renv: Path) -> None:
210
232
  logger.info(f"renv environment loaded for project: {project_dir}")
211
233
 
212
234
 
235
+ # ---------------------------------------------------------------------
236
+ # NamespaceWrapper
237
+ # ---------------------------------------------------------------------
238
+ class NamespaceWrapper:
239
+ """
240
+ Wraps an R script namespace for Python attribute access.
241
+ """
242
+
243
+ def __init__(self, env):
244
+ self._env = env
245
+
246
+ def __getattr__(self, func_name):
247
+ if func_name in self._env:
248
+ return self._env[func_name]
249
+ raise AttributeError(f"Function '{func_name}' not found in R namespace")
250
+
251
+
213
252
  # ---------------------------------------------------------------------
214
253
  # RFunctionCaller
215
254
  # ---------------------------------------------------------------------
216
255
  class RFunctionCaller:
217
256
  """
218
- Utility to load and call R functions from a script, lazily loading rpy2 and activating renv.
257
+ Utility to load and call R functions from scripts, lazily loading rpy2 and activating renv.
219
258
 
220
259
  Supports:
221
- - Scripts with custom functions
260
+ - Single or multiple R scripts
261
+ - R script directories (sources all `.R` files inside)
222
262
  - Base R functions
223
- - Functions in installed packages
224
- - Automatic conversion of Python types (lists, dicts, scalars, pandas DataFrames) to R objects
263
+ - Functions in loaded packages
264
+ - Automatic conversion of Python types to R objects
265
+
266
+ Args:
267
+ scripts:
268
+ Path or list of Paths.
269
+ Each path may be:
270
+ - an R script (.R file)
271
+ - a directory containing R scripts (all *.R files are sourced)
272
+ - scripts in subdirectories are not automatically sourced
273
+
225
274
  """
226
275
 
227
276
  def __init__(
228
277
  self,
229
278
  path_to_renv: Path | None = None,
230
- script_path: Path | None = None,
231
- packages: list[str] | None = None,
279
+ scripts: Path | list[Path] | None = None,
280
+ packages: str | list[str] | None = None,
281
+ **kwargs, # catch unexpected keywords
232
282
  ):
283
+ # --- Handle deprecated 'script_path' ---
284
+ if "script_path" in kwargs:
285
+ script_path_value = kwargs.pop("script_path")
286
+ warnings.warn(
287
+ "'script_path' argument is deprecated. "
288
+ "Please use 'scripts' instead (accepts a Path or list of Paths).",
289
+ DeprecationWarning,
290
+ stacklevel=2,
291
+ )
292
+ if scripts is None:
293
+ scripts = script_path_value
294
+ else:
295
+ # Both provided → prioritize scripts and ignore script_path
296
+ logger.warning(
297
+ "'script_path' ignored because 'scripts' argument is also provided."
298
+ )
299
+
300
+ # Raise error if other unexpected kwargs remain
301
+ if kwargs:
302
+ raise TypeError(
303
+ f"RFunctionCaller.__init__() received unexpected keyword arguments: {list(kwargs.keys())}"
304
+ )
305
+
233
306
  self.path_to_renv = path_to_renv.resolve() if path_to_renv else None
234
- self.script_path = script_path.resolve() if script_path else None
235
- self.packages = packages or None
307
+ self._namespaces: dict[str, Any] = {}
308
+
309
+ # Normalize scripts to a list
310
+ if scripts is None:
311
+ self.scripts: list[Path] = []
312
+ elif isinstance(scripts, Path):
313
+ self.scripts = [scripts.resolve()]
314
+ else:
315
+ self.scripts = [s.resolve() for s in scripts]
316
+
317
+ # Normalize packages to a list
318
+ if packages is None:
319
+ self.packages: list[str] = []
320
+ elif isinstance(packages, str):
321
+ self.packages = [packages]
322
+ else:
323
+ self.packages = packages
236
324
 
237
325
  # Lazy-loaded attributes
238
326
  self._r = None
@@ -247,76 +335,118 @@ class RFunctionCaller:
247
335
  self.ListVector = None
248
336
  self.NamedList = None
249
337
 
250
- if self.script_path and not self.script_path.exists():
251
- raise FileNotFoundError(f"R script not found: {self.script_path}")
252
-
253
- self.script_dir = self.script_path.parent if self.script_path else None
254
- self._script_loaded = False
338
+ # Internal state
255
339
  self._renv_activated = False
256
340
  self._packages_loaded = False
341
+ self._scripts_loaded = [False] * len(self.scripts)
257
342
 
258
343
  # -----------------------------------------------------------------
259
344
  # Internal: lazy R loading
260
345
  # -----------------------------------------------------------------
261
- def _ensure_r_loaded(self):
262
- if self._r is None:
263
- r = _require_rpy2(raise_on_missing=True)
264
- self._r = r
265
- self.ro = r["ro"]
266
- self.robjects = r["robjects"]
267
- self.pandas2ri = r["pandas2ri"]
268
- self.localconverter = r["localconverter"]
269
- self.IntVector = r["IntVector"]
270
- self.FloatVector = r["FloatVector"]
271
- self.BoolVector = r["BoolVector"]
272
- self.StrVector = r["StrVector"]
273
- self.ListVector = r["ListVector"]
274
- self.NamedList = r["NamedList"]
275
-
276
- # Activate renv
277
- if self.path_to_renv and not self._renv_activated:
278
- activate_renv(self.path_to_renv)
279
- self._renv_activated = True
280
-
281
- # Load packages
282
- if self.packages and not self._packages_loaded:
283
- for pkg in self.packages:
284
- try:
285
- self.robjects.r(f'suppressMessages(library("{pkg}"))')
286
- except Exception:
287
- logger.info(f"Package '{pkg}' not found. Installing...")
288
- self.robjects.r(
289
- f'install.packages("{pkg}", repos="https://cloud.r-project.org")'
346
+ def _ensure_r_loaded(self) -> None:
347
+ """
348
+ Ensure R runtime is initialized and all configured R scripts
349
+ are sourced exactly once, in isolated environments.
350
+ """
351
+ if self.robjects is None:
352
+ rpy2_dict = _ensure_rpy2()
353
+ self._RPY2 = rpy2_dict # cache in instance
354
+ self._r = rpy2_dict["ro"]
355
+ self.ro = rpy2_dict["robjects"]
356
+ self.robjects = rpy2_dict["robjects"]
357
+ self.pandas2ri = rpy2_dict["pandas2ri"]
358
+ self.localconverter = rpy2_dict["localconverter"]
359
+ self.IntVector = rpy2_dict["IntVector"]
360
+ self.FloatVector = rpy2_dict["FloatVector"]
361
+ self.BoolVector = rpy2_dict["BoolVector"]
362
+ self.StrVector = rpy2_dict["StrVector"]
363
+ self.ListVector = rpy2_dict["ListVector"]
364
+ self.NamedList = rpy2_dict["NamedList"]
365
+
366
+ r = self.robjects.r
367
+
368
+ # Ensure required R package
369
+ self.ensure_r_package("withr")
370
+
371
+ if not hasattr(self, "_namespaces"):
372
+ self._namespaces: dict[str, dict[str, Any]] = {}
373
+
374
+ # --- Iterate over scripts ---
375
+ for idx, script_entry in enumerate(self.scripts):
376
+ if self._scripts_loaded[idx]:
377
+ continue
378
+
379
+ script_entry = script_entry.resolve()
380
+
381
+ if script_entry.is_file():
382
+ r_files = [script_entry]
383
+ elif script_entry.is_dir():
384
+ r_files = sorted(script_entry.glob("*.R"))
385
+ if not r_files:
386
+ logger.warning(f"No .R files found in directory: {script_entry}")
387
+ self._scripts_loaded[idx] = True
388
+ continue
389
+ else:
390
+ raise ValueError(f"Invalid script path: {script_entry}")
391
+
392
+ for script_path in r_files:
393
+ ns_name = script_path.stem
394
+ logger.info(
395
+ f"Loading R script '{script_path.name}' as namespace '{ns_name}'"
396
+ )
397
+
398
+ r("env <- new.env(parent=globalenv())")
399
+ r(f'script_path <- "{script_path.as_posix()}"')
400
+
401
+ r(
402
+ """
403
+ withr::with_dir(
404
+ dirname(script_path),
405
+ sys.source(basename(script_path), envir=env)
290
406
  )
291
- self.robjects.r(f'suppressMessages(library("{pkg}"))')
292
- self._packages_loaded = True
407
+ """
408
+ )
409
+
410
+ env_obj = r("env")
411
+ self._namespaces[ns_name] = {
412
+ name: env_obj[name]
413
+ for name in env_obj.keys()
414
+ if callable(env_obj[name])
415
+ }
416
+
417
+ logger.info(
418
+ f"Registered {len(self._namespaces[ns_name])} functions in namespace '{ns_name}'"
419
+ )
293
420
 
294
- # Source script
295
- if self.script_path and not self._script_loaded:
296
- self.robjects.r(f'setwd("{self.script_dir.as_posix()}")')
297
- self.robjects.r(f'source("{self.script_path.as_posix()}")')
298
- logger.info(f"R script sourced: {self.script_path.name}")
299
- self._script_loaded = True
421
+ self._scripts_loaded[idx] = True
422
+
423
+ # -----------------------------------------------------------------
424
+ # Autocomplete-friendly attribute access for script namespaces
425
+ # -----------------------------------------------------------------
426
+ def __getattr__(self, name: str):
427
+ if "_namespaces" in self.__dict__ and name in self._namespaces:
428
+ ns_env = self._namespaces[name]
429
+ return NamespaceWrapper(ns_env)
430
+ raise AttributeError(f"'RFunctionCaller' object has no attribute '{name}'")
300
431
 
301
432
  def _clean_scalar(self, x):
302
433
  """
303
434
  Clean R-style missing values to pandas/NumPy equivalents.
304
435
  Called inside _r2py on each vector element; atomic/scalar only.
305
436
  """
306
- r = self._r
307
- ro = r["robjects"]
437
+ robjects = self.robjects
308
438
 
309
439
  if x is None:
310
440
  return None
311
441
 
312
442
  if x in (
313
- getattr(ro, "NA_Real", None),
314
- getattr(ro, "NA_Integer", None),
315
- getattr(ro, "NA_Logical", None),
443
+ getattr(robjects, "NA_Real", None),
444
+ getattr(robjects, "NA_Integer", None),
445
+ getattr(robjects, "NA_Logical", None),
316
446
  ):
317
447
  return None
318
448
 
319
- if x is getattr(ro, "NA_Character", None):
449
+ if x is getattr(robjects, "NA_Character", None):
320
450
  return None
321
451
 
322
452
  if isinstance(x, float) and np.isnan(x):
@@ -340,92 +470,56 @@ class RFunctionCaller:
340
470
  StrVector = self.StrVector
341
471
  ListVector = self.ListVector
342
472
  localconverter = self.localconverter
343
- import pandas as pd
344
- import rpy2.robjects.vectors as rvec
345
-
346
- # Pass through existing R objects
347
- if isinstance(
348
- obj,
349
- (
350
- rvec.IntVector,
351
- rvec.FloatVector,
352
- rvec.BoolVector,
353
- rvec.StrVector,
354
- rvec.ListVector,
355
- robjects.DataFrame,
356
- ),
357
- ):
473
+
474
+ r_types = (
475
+ robjects.vectors.IntVector,
476
+ robjects.vectors.FloatVector,
477
+ robjects.vectors.BoolVector,
478
+ robjects.vectors.StrVector,
479
+ robjects.vectors.ListVector,
480
+ robjects.DataFrame,
481
+ )
482
+ if isinstance(obj, r_types):
358
483
  return obj
359
484
 
485
+ def is_na(x):
486
+ return x is None or x is pd.NA or (isinstance(x, float) and pd.isna(x))
487
+
360
488
  with localconverter(robjects.default_converter + pandas2ri.converter):
361
- if obj is None or obj is pd.NA:
489
+ if is_na(obj):
362
490
  return robjects.NULL
363
-
364
- # DataFrame → data.frame
365
491
  if isinstance(obj, pd.DataFrame):
366
492
  return pandas2ri.py2rpy(obj)
367
-
368
- # Series → vector
369
493
  if isinstance(obj, pd.Series):
370
494
  return self._py2r(obj.tolist())
371
-
372
- # Scalars
373
495
  if isinstance(obj, (int, float, bool, str)):
374
496
  return obj
375
-
376
- # Lists
377
497
  if isinstance(obj, list):
378
498
  if len(obj) == 0:
379
499
  return FloatVector([])
380
- elif all(isinstance(x, (int, float)) or x is None for x in obj):
381
- return FloatVector(
382
- [robjects.NA_Real if x is None else float(x) for x in obj]
383
- )
384
500
 
385
- def is_na(x):
386
- return (
387
- x is None or x is pd.NA or (isinstance(x, float) and pd.isna(x))
388
- )
389
-
390
- # Homogeneous numeric
391
- if all(
392
- isinstance(x, (int, float)) and not isinstance(x, bool) or is_na(x)
393
- for x in obj
394
- ):
501
+ types = set(type(x) for x in obj if not is_na(x))
502
+ if types <= {int, float}:
395
503
  return FloatVector(
396
504
  [robjects.NA_Real if is_na(x) else float(x) for x in obj]
397
505
  )
398
-
399
- # Homogeneous bool
400
- if all(isinstance(x, bool) or is_na(x) for x in obj):
506
+ if types <= {bool}:
401
507
  return BoolVector(
402
508
  [robjects.NA_Logical if is_na(x) else x for x in obj]
403
509
  )
404
-
405
- # Homogeneous str
406
- if all(isinstance(x, str) or is_na(x) for x in obj):
510
+ if types <= {str}:
407
511
  return StrVector(
408
512
  [robjects.NA_Character if is_na(x) else x for x in obj]
409
513
  )
410
-
411
- # Mixed or nested list → ListVector with positional keys
412
514
  return ListVector({str(i): self._py2r(v) for i, v in enumerate(obj)})
413
-
414
- # Dict → NamedList
415
515
  if isinstance(obj, dict):
416
516
  return ListVector({k: self._py2r(v) for k, v in obj.items()})
417
-
418
517
  raise NotImplementedError(f"Cannot convert Python object to R: {type(obj)}")
419
518
 
420
519
  # -----------------------------------------------------------------
421
520
  # R -> Python conversion
422
521
  # -----------------------------------------------------------------
423
522
  def _r2py(self, obj, top_level=True):
424
- """
425
- Convert R objects to Python objects robustly.
426
- Handles DataFrames, NamedList/ListVector, atomic vectors, and NULL.
427
- """
428
- r = self._r
429
523
  robjects = self.robjects
430
524
  NamedList = self.NamedList
431
525
  ListVector = self.ListVector
@@ -433,7 +527,7 @@ class RFunctionCaller:
433
527
  IntVector = self.IntVector
434
528
  FloatVector = self.FloatVector
435
529
  BoolVector = self.BoolVector
436
- NULLType = r["NULLType"]
530
+ NULLType = self._RPY2["NULLType"]
437
531
  lc = self.localconverter
438
532
  pandas2ri = self.pandas2ri
439
533
 
@@ -444,12 +538,10 @@ class RFunctionCaller:
444
538
  with lc(robjects.default_converter + pandas2ri.converter):
445
539
  df = robjects.conversion.rpy2py(obj)
446
540
  df = postprocess_r_dataframe(df)
447
- df = clean_r_missing(df, caller=self)
448
- return df
541
+ return clean_r_missing(df, caller=self)
449
542
 
450
543
  if isinstance(obj, (NamedList, ListVector)):
451
544
  py_obj = r_namedlist_to_dict(obj, caller=self, top_level=top_level)
452
- # Auto-unpack single-element lists only at top-level
453
545
  if isinstance(py_obj, list) and len(py_obj) == 1 and top_level:
454
546
  return py_obj[0]
455
547
  return py_obj
@@ -465,58 +557,79 @@ class RFunctionCaller:
465
557
  # -----------------------------------------------------------------
466
558
  # Public: ensure R package is available
467
559
  # -----------------------------------------------------------------
468
- def ensure_r_package(self, pkg_name: str):
560
+ def ensure_r_package(self, pkg: str):
469
561
  r = self.robjects.r
470
562
  try:
471
- r(f'suppressMessages(library("{pkg_name}", character.only=TRUE))')
563
+ r(f'suppressMessages(library("{pkg}", character.only=TRUE))')
472
564
  except Exception:
473
- r(f'install.packages("{pkg_name}", repos="https://cloud.r-project.org")')
474
- r(f'suppressMessages(library("{pkg_name}", character.only=TRUE))')
565
+ logger.info(f"Package '{pkg}' not found.")
566
+ logger.warning(f"Installing missing R package: {pkg}")
567
+ r(f'install.packages("{pkg}", repos="https://cloud.r-project.org")')
568
+ r(f'suppressMessages(library("{pkg}", character.only=TRUE))')
475
569
 
476
570
  # -----------------------------------------------------------------
477
571
  # Public: call an R function
478
572
  # -----------------------------------------------------------------
479
573
  def call(self, func_name: str, *args, **kwargs):
480
- """
481
- Call an R function safely. Supports:
482
- - functions defined in scripts
483
- - base R functions
484
- - functions in loaded packages
485
- """
486
574
  self._ensure_r_loaded()
487
575
 
488
- # --- Find the function ---
489
576
  func = None
490
- try:
491
- func = self.robjects.globalenv[func_name] # script-defined
492
- except KeyError:
493
- try:
494
- func = self.robjects.r[func_name] # base or package function
495
- except KeyError:
496
- # --- Added: handle namespaced functions like stats::median ---
497
- if "::" in func_name:
498
- pkg, fname = func_name.split("::", 1)
499
- try:
500
- func = self.robjects.r(f"{pkg}::{fname}")
501
- except Exception as e:
502
- raise RuntimeError(
503
- f"Failed to load R function '{func_name}' via namespace: {e}"
504
- ) from e
505
-
506
- if func is None:
507
- raise ValueError(f"R function '{func_name}' not found.")
508
-
509
- # --- Convert Python args to R ---
577
+ source_info = None
578
+
579
+ if "::" in func_name:
580
+ ns_name, fname = func_name.split("::", 1)
581
+ if ns_name in self._namespaces:
582
+ ns_env = self._namespaces[ns_name]
583
+ if fname in ns_env:
584
+ func = ns_env[fname]
585
+ source_info = f"script namespace '{ns_name}'"
586
+ else:
587
+ raise ValueError(
588
+ f"Function '{fname}' not found in R script namespace '{ns_name}'"
589
+ )
590
+ else:
591
+ try:
592
+ func = self.robjects.r(f"{ns_name}::{fname}")
593
+ source_info = f"R package '{ns_name}'"
594
+ except Exception as e:
595
+ raise RuntimeError(
596
+ f"Failed to resolve R function '{func_name}': {e}"
597
+ ) from e
598
+
599
+ else:
600
+ for ns_name, ns_env in self._namespaces.items():
601
+ if func_name in ns_env:
602
+ func = ns_env[func_name]
603
+ source_info = f"script namespace '{ns_name}'"
604
+ break
605
+
606
+ if func is None:
607
+ try:
608
+ func = self.robjects.globalenv[func_name]
609
+ source_info = "global environment"
610
+ except KeyError:
611
+ pass
612
+
613
+ if func is None:
614
+ try:
615
+ func = self.robjects.r[func_name]
616
+ source_info = "base R / loaded package"
617
+ except KeyError:
618
+ raise ValueError(
619
+ f"R function '{func_name}' not found in any namespace, global env, or base R."
620
+ )
621
+
510
622
  r_args = [self._py2r(a) for a in args]
511
623
  r_kwargs = {k: self._py2r(v) for k, v in kwargs.items()}
512
624
 
513
- # --- Call safely ---
514
625
  try:
515
626
  result = func(*r_args, **r_kwargs)
516
627
  except Exception as e:
517
- raise RuntimeError(f"Error calling R function '{func_name}': {e}")
628
+ raise RuntimeError(
629
+ f"Error calling R function '{func_name}' from {source_info}: {e}"
630
+ ) from e
518
631
 
519
- # --- Convert R result back to Python ---
632
+ logger.info(f"Called R function '{func_name}' from {source_info}")
520
633
  return self._r2py(result)
521
634
 
522
635
 
@@ -525,10 +638,6 @@ class RFunctionCaller:
525
638
  # Utility functions for R ↔ Python
526
639
  # ------------------------------
527
640
  def r_namedlist_to_dict(namedlist, caller: RFunctionCaller, top_level=False):
528
- """
529
- Recursively convert an R NamedList or ListVector to a Python dictionary.
530
- Uses the caller._r2py method for nested conversions.
531
- """
532
641
  r = _ensure_rpy2()
533
642
  NamedList = r["NamedList"]
534
643
  ListVector = r["ListVector"]
@@ -536,31 +645,24 @@ def r_namedlist_to_dict(namedlist, caller: RFunctionCaller, top_level=False):
536
645
  if isinstance(namedlist, (NamedList, ListVector)):
537
646
  names = namedlist.names if not callable(namedlist.names) else namedlist.names()
538
647
 
539
- # Detect positional (unnamed) list
540
648
  if names and all(str(i) == str(name) for i, name in enumerate(names)):
541
649
  out = []
542
650
  for v in namedlist:
543
- # Nested elements are never top-level
544
651
  val = caller._r2py(v, top_level=False)
545
652
  out.append(val)
546
653
  return out
547
654
 
548
- # Otherwise dict
549
655
  result = {}
550
656
  for i, val in enumerate(namedlist):
551
657
  key = names[i] if names and i < len(names) else str(i)
552
- v_py = caller._r2py(val, top_level=False) # nested elements
658
+ v_py = caller._r2py(val, top_level=False)
553
659
  result[str(key)] = v_py
554
660
  return result
555
661
 
556
- # Fallback: scalar/vector at the very top
557
662
  return caller._r2py(namedlist, top_level=top_level)
558
663
 
559
664
 
560
665
  def clean_r_dataframe(r_df: pd.DataFrame) -> pd.DataFrame:
561
- """
562
- Clean an R data.frame by removing non-structural attributes like .groups and .rows.
563
- """
564
666
  for attr in [".groups", ".rows"]:
565
667
  try:
566
668
  del r_df.attrs[attr]
@@ -570,18 +672,11 @@ def clean_r_dataframe(r_df: pd.DataFrame) -> pd.DataFrame:
570
672
 
571
673
 
572
674
  def fix_string_nans(df: pd.DataFrame) -> pd.DataFrame:
573
- """
574
- Replace string NAs or empty strings with pd.NA.
575
- """
576
675
  return df.replace(["nan", "NaN", "NA", "na", ""], pd.NA)
577
676
 
578
677
 
579
678
  def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
580
- """
581
- Normalize dtypes in a single DataFrame after R conversion.
582
- """
583
679
  df = df.replace(["", "nan", "NaN", "NA", "na"], pd.NA)
584
-
585
680
  for col in df.columns:
586
681
  series = df[col]
587
682
  if pd.api.types.is_object_dtype(series):
@@ -594,18 +689,10 @@ def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
594
689
 
595
690
 
596
691
  def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
597
- """
598
- Post-process R DataFrame:
599
- - Convert R NA_integer_ sentinel (-2147483648) to pd.NA
600
- - Convert R-style numeric dates to datetime
601
- - Remove timezone from datetime columns
602
- """
603
692
  for col in df.columns:
604
693
  series = df[col]
605
-
606
694
  if pd.api.types.is_integer_dtype(series):
607
695
  df[col] = series.mask(series == -2147483648, pd.NA)
608
-
609
696
  if pd.api.types.is_numeric_dtype(series):
610
697
  values = series.dropna()
611
698
  if not values.empty and values.between(10000, 40000).all():
@@ -615,24 +702,15 @@ def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
615
702
  )
616
703
  except Exception:
617
704
  pass
618
-
619
705
  if pd.api.types.is_datetime64tz_dtype(series):
620
706
  df[col] = series.dt.tz_localize(None)
621
-
622
707
  return df
623
708
 
624
709
 
625
710
  def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
626
- """
627
- Apply a series of fixes to a DataFrame converted from R:
628
- - Type corrections
629
- - String NA normalization
630
- - Index normalization
631
- """
632
711
  df = fix_r_dataframe_types(df)
633
712
  df = fix_string_nans(df)
634
713
  df = normalize_single_df_dtypes(df)
635
-
636
714
  if df.index.dtype == object:
637
715
  try:
638
716
  int_index = df.index.astype(int)
@@ -644,62 +722,37 @@ def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
644
722
 
645
723
 
646
724
  def clean_r_missing(obj, caller: RFunctionCaller):
647
- """
648
- Post-process R return objects for downstream Python use.
649
- Recursively convert R-style missing values to pandas/NumPy:
650
- - NA_integer_, NA_real_, NA_logical_ → np.nan
651
- - NA_character_ → pd.NA
652
- """
653
- r = _ensure_rpy2()
654
- ro = r["robjects"]
655
-
725
+ robjects = caller.robjects
656
726
  NA_MAP = {
657
- getattr(ro, "NA_Real", None): np.nan,
658
- getattr(ro, "NA_Integer", None): np.nan,
659
- getattr(ro, "NA_Logical", None): np.nan,
660
- getattr(ro, "NA_Character", None): pd.NA,
727
+ getattr(robjects, "NA_Real", None): np.nan,
728
+ getattr(robjects, "NA_Integer", None): np.nan,
729
+ getattr(robjects, "NA_Logical", None): np.nan,
730
+ getattr(robjects, "NA_Character", None): pd.NA,
661
731
  }
662
732
 
663
733
  if isinstance(obj, pd.DataFrame):
664
734
  for col in obj.columns:
665
735
  obj[col] = obj[col].apply(lambda x: clean_r_missing(x, caller))
666
736
  return obj
667
-
668
737
  elif isinstance(obj, dict):
669
738
  return {k: clean_r_missing(v, caller) for k, v in obj.items()}
670
-
671
739
  elif isinstance(obj, list):
672
740
  return [clean_r_missing(v, caller) for v in obj]
673
-
674
741
  else:
675
742
  return NA_MAP.get(obj, obj)
676
743
 
677
744
 
678
- # %%
679
- # -------------------------------------------
680
- # Functions here onwards are utility functions
681
- # for comparing R and Python DataFrames.
682
- # -------------------------------------------
683
-
684
-
745
+ # ---------------------------------------------------------------------
746
+ # DataFrame comparison utilities
747
+ # ---------------------------------------------------------------------
685
748
  def normalize_dtypes(
686
749
  df1: pd.DataFrame, df2: pd.DataFrame
687
750
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
688
- """
689
- Aligns column dtypes across two DataFrames for accurate comparison.
690
- - Replaces empty strings with pd.NA.
691
- - Attempts to coerce strings to numeric where applicable.
692
- - Aligns dtypes between matching columns (e.g. float64 vs int64).
693
- """
694
751
  for col in df1.columns.intersection(df2.columns):
695
- # Replace empty strings with NA
696
752
  df1[col] = df1[col].replace("", pd.NA)
697
753
  df2[col] = df2[col].replace("", pd.NA)
698
-
699
754
  s1, s2 = df1[col], df2[col]
700
755
  dtype1, dtype2 = s1.dtype, s2.dtype
701
-
702
- # If one is numeric and the other is object, try coercing both to numeric
703
756
  if (
704
757
  pd.api.types.is_numeric_dtype(dtype1)
705
758
  and pd.api.types.is_object_dtype(dtype2)
@@ -710,98 +763,57 @@ def normalize_dtypes(
710
763
  try:
711
764
  df1[col] = pd.to_numeric(s1, errors="coerce")
712
765
  df2[col] = pd.to_numeric(s2, errors="coerce")
713
- continue # skip to next column if coercion succeeds
766
+ continue
714
767
  except Exception:
715
- pass # fallback to next block if coercion fails
716
-
717
- # If both are numeric but of different types (e.g., int vs float), unify to float64
768
+ pass
718
769
  if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(
719
770
  dtype2
720
771
  ):
721
772
  df1[col] = df1[col].astype("float64")
722
773
  df2[col] = df2[col].astype("float64")
723
774
  continue
724
-
725
- # If both are objects or strings, convert both to str for equality comparison
726
775
  if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
727
776
  df1[col] = df1[col].astype(str)
728
777
  df2[col] = df2[col].astype(str)
729
-
730
778
  return df1, df2
731
779
 
732
780
 
733
- # %%
734
781
  def align_numeric_dtypes(
735
782
  df1: pd.DataFrame, df2: pd.DataFrame
736
783
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
737
- """
738
- Ensure aligned numeric dtypes between two DataFrames for accurate comparison.
739
- Converts between int, float, and numeric-looking strings where appropriate.
740
- Also handles NA and empty string normalization.
741
- """
742
784
  for col in df1.columns.intersection(df2.columns):
743
- s1, s2 = df1[col], df2[col]
744
-
745
- # Replace empty strings with NA to avoid type promotion issues
746
- s1 = s1.replace("", pd.NA)
747
- s2 = s2.replace("", pd.NA)
748
-
749
- # Try to coerce both to numeric (non-destructive)
785
+ s1, s2 = df1[col].replace("", pd.NA), df2[col].replace("", pd.NA)
750
786
  try:
751
787
  s1_num = pd.to_numeric(s1, errors="coerce")
752
788
  s2_num = pd.to_numeric(s2, errors="coerce")
753
-
754
- # If at least one successfully converts and it's not all NaN
755
789
  if not s1_num.isna().all() or not s2_num.isna().all():
756
790
  df1[col] = s1_num.astype("float64")
757
791
  df2[col] = s2_num.astype("float64")
758
- continue # move to next column
792
+ continue
759
793
  except Exception:
760
794
  pass
761
-
762
- # Otherwise, fall back to original values
763
- df1[col] = s1
764
- df2[col] = s2
765
-
795
+ df1[col], df2[col] = s1, s2
766
796
  return df1, df2
767
797
 
768
798
 
769
- # %%
770
799
  def compare_r_py_dataframes(
771
800
  df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8
772
801
  ) -> dict:
773
- """
774
- Compare a Python DataFrame (df1) with an R DataFrame converted to pandas (df2).
775
-
776
- Returns:
777
- dict with mismatch diagnostics, preserving original indices in diffs.
778
- """
779
-
780
802
  results: dict[str, Any] = {
781
803
  "shape_mismatch": False,
782
804
  "columns_mismatch": False,
783
805
  "index_mismatch": False,
784
- "numeric_diffs": {}, # type: dict[str, pd.DataFrame]
785
- "non_numeric_diffs": {}, # type: dict[str, pd.DataFrame]
806
+ "numeric_diffs": {},
807
+ "non_numeric_diffs": {},
786
808
  }
787
-
788
- # --- Preprocessing: fix R-specific issues ---
789
809
  df2 = fix_r_dataframe_types(df2)
790
-
791
- # --- Replace common string NAs with proper pd.NA ---
792
810
  df1 = fix_string_nans(df1)
793
811
  df2 = fix_string_nans(df2)
794
-
795
- # --- Normalize and align dtypes ---
796
812
  df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
797
813
  df1, df2 = align_numeric_dtypes(df1, df2)
798
-
799
- # --- Check shape ---
800
814
  if df1.shape != df2.shape:
801
815
  results["shape_mismatch"] = True
802
816
  print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
803
-
804
- # --- Check columns ---
805
817
  if set(df1.columns) != set(df2.columns):
806
818
  results["columns_mismatch"] = True
807
819
  print("[Warning] Column mismatch:")
@@ -810,21 +822,13 @@ def compare_r_py_dataframes(
810
822
  common_cols = df1.columns.intersection(df2.columns)
811
823
  else:
812
824
  common_cols = df1.columns
813
-
814
- # --- Ensure columns are the same order ---
815
- df1_aligned = df1.loc[:, common_cols]
816
- df2_aligned = df2.loc[:, common_cols]
817
-
818
- # --- Compare values column by column ---
825
+ df1_aligned, df2_aligned = df1.loc[:, common_cols], df2.loc[:, common_cols]
819
826
  for col in common_cols:
820
- col_py = df1_aligned[col]
821
- col_r = df2_aligned[col]
822
-
827
+ col_py, col_r = df1_aligned[col], df2_aligned[col]
823
828
  if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(
824
829
  col_r
825
830
  ):
826
831
  col_py, col_r = col_py.align(col_r)
827
-
828
832
  close = np.isclose(
829
833
  col_py.fillna(np.nan),
830
834
  col_r.fillna(np.nan),
@@ -832,30 +836,15 @@ def compare_r_py_dataframes(
832
836
  equal_nan=True,
833
837
  )
834
838
  if not close.all():
835
- diffs = pd.DataFrame(
836
- {
837
- "df1": col_py[~close],
838
- "df2": col_r[~close],
839
- }
839
+ results["numeric_diffs"][col] = pd.DataFrame(
840
+ {"df1": col_py[~close], "df2": col_r[~close]}
840
841
  )
841
- results["numeric_diffs"][col] = diffs
842
-
843
842
  else:
844
- # Treat missing values as equal: create mask where values differ excluding matching NAs
845
843
  unequal = ~col_py.eq(col_r)
846
844
  both_na = col_py.isna() & col_r.isna()
847
845
  unequal = unequal & ~both_na
848
-
849
846
  if unequal.any():
850
- diffs = pd.DataFrame(
851
- {
852
- "df1": col_py[unequal],
853
- "df2": col_r[unequal],
854
- }
847
+ results["non_numeric_diffs"][col] = pd.DataFrame(
848
+ {"df1": col_py[unequal], "df2": col_r[unequal]}
855
849
  )
856
- results["non_numeric_diffs"][col] = diffs
857
-
858
850
  return results
859
-
860
-
861
- # %%
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rpy-bridge
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Python-to-R interoperability engine with environment management, type-safe conversions, data normalization, and safe R function execution.
5
5
  Author-email: Victoria Cheung <victoriakcheung@gmail.com>
6
6
  License: MIT License
@@ -31,12 +31,17 @@ License: MIT License
31
31
 
32
32
  Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
33
33
  Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
34
+ Keywords: python,r,rpy2,python-r,interoperability,data-science,statistics,bioinformatics
34
35
  Classifier: License :: OSI Approved :: MIT License
35
36
  Classifier: Programming Language :: Python
36
37
  Classifier: Programming Language :: Python :: 3
37
38
  Classifier: Programming Language :: Python :: 3.11
38
39
  Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Intended Audience :: Developers
41
+ Classifier: Intended Audience :: Science/Research
39
42
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
43
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
44
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
40
45
  Requires-Python: >=3.11
41
46
  Description-Content-Type: text/markdown
42
47
  License-File: LICENSE
File without changes
File without changes
File without changes
File without changes