PyPI - rpy-bridge - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

rpy-bridge 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

rpy_bridge/__init__.py +2 -2
rpy_bridge/rpy2_utils.py +521 -257
rpy_bridge-0.3.0.dist-info/METADATA +251 -0
rpy_bridge-0.3.0.dist-info/RECORD +8 -0
rpy_bridge-0.1.0.dist-info/METADATA +0 -205
rpy_bridge-0.1.0.dist-info/RECORD +0 -8
{rpy_bridge-0.1.0.dist-info → rpy_bridge-0.3.0.dist-info}/WHEEL +0 -0
{rpy_bridge-0.1.0.dist-info → rpy_bridge-0.3.0.dist-info}/licenses/LICENSE +0 -0
{rpy_bridge-0.1.0.dist-info → rpy_bridge-0.3.0.dist-info}/top_level.txt +0 -0

rpy_bridge/rpy2_utils.py CHANGED Viewed

@@ -16,23 +16,12 @@ import warnings
 warnings.filterwarnings("ignore", message="Environment variable .* redefined by R")
 from pathlib import Path
+import sys
+import subprocess
+import math
 import numpy as np
 import pandas as pd
-import rpy2.robjects as ro
-from rpy2 import robjects
-from rpy2.rinterface_lib.sexp import NULLType
-from rpy2.rlike.container import NamedList
-from rpy2.robjects import pandas2ri
-from rpy2.robjects.conversion import localconverter
-from rpy2.robjects.vectors import (
-    BoolVector,
-    FloatVector,
-    IntVector,
-    ListVector,
-    StrVector,
-)
-from typing import Optional
 try:
     from loguru import logger  # type: ignore
@@ -43,358 +32,619 @@ except Exception:
     logger = logging.getLogger("rpy-bridge")
-# %%
-def activate_renv(path_to_renv: Path) -> None:
-    """
-    Activates the renv environment using renv::load() to ensure the correct project is loaded.
-    This avoids sourcing activate.R directly and avoids accidentally initializing a new environment.
+# ---------------------------------------------------------------------
+# R detection and rpy2 installation
+# ---------------------------------------------------------------------
+def ensure_rpy2_installed(r_home: str):
+    os.environ["R_HOME"] = r_home
+    try:
+        import rpy2  # noqa: F401
+    except ImportError:
+        logger.info(
+            f"[Info] rpy2 not installed or incompatible with R_HOME={r_home}. Installing..."
+        )
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install", "--force-reinstall", "rpy2"]
+        )
+        import rpy2  # noqa: F401
-    Accepts either:
-    - Direct path to renv directory (e.g., /path/to/renv)
-    - Parent directory containing renv/ folder (e.g., /path/to/repos where renv/ is inside)
-    """
-    path_to_renv = path_to_renv.resolve()
+def find_r_home():
+    try:
+        r_home = subprocess.check_output(
+            ["R", "--vanilla", "--slave", "-e", "cat(R.home())"],
+            stderr=subprocess.PIPE,
+            text=True,
+        ).strip()
+        if r_home.endswith(">"):
+            r_home = r_home[:-1].strip()
+        return r_home
+    except FileNotFoundError:
+        possible_paths = [
+            "/usr/lib/R",
+            "/usr/local/lib/R",
+            "/opt/homebrew/Cellar/r/4.5.2/lib/R",  # Homebrew macOS
+            "C:\\Program Files\\R\\R-4.5.2",  # Windows
+        ]
+        for p in possible_paths:
+            if os.path.exists(p):
+                return p
+        return None
+R_HOME = find_r_home()
+if not R_HOME:
+    raise RuntimeError("R not found. Please install R or add it to PATH.")
+logger.info(f"R_HOME = {R_HOME}")
+os.environ["R_HOME"] = R_HOME
+ensure_rpy2_installed(R_HOME)
+# macOS dynamic library path
+if sys.platform == "darwin":
+    lib_path = os.path.join(R_HOME, "lib")
+    if lib_path not in os.environ.get("DYLD_FALLBACK_LIBRARY_PATH", ""):
+        os.environ["DYLD_FALLBACK_LIBRARY_PATH"] = (
+            f"{lib_path}:{os.environ.get('DYLD_FALLBACK_LIBRARY_PATH','')}"
+        )
+elif sys.platform.startswith("linux"):
+    lib_path = os.path.join(R_HOME, "lib")
+    ld_path = os.environ.get("LD_LIBRARY_PATH", "")
+    os.environ["LD_LIBRARY_PATH"] = f"{lib_path}:{ld_path}"
+# ---------------------------------------------------------------------
+# Lazy rpy2 import machinery
+# ---------------------------------------------------------------------
+_RPY2: dict | None = None
-    # Determine if path_to_renv is the renv directory itself or its parent
+def _require_rpy2(raise_on_missing: bool = True) -> dict | None:
+    global _RPY2
+    if _RPY2 is not None:
+        return _RPY2
+    try:
+        import rpy2.robjects as ro
+        from rpy2 import robjects
+        from rpy2.robjects import pandas2ri
+        from rpy2.robjects.conversion import localconverter
+        from rpy2.robjects.vectors import (
+            BoolVector,
+            FloatVector,
+            IntVector,
+            ListVector,
+            StrVector,
+        )
+        from rpy2.rinterface_lib.sexp import NULLType
+        from rpy2.rlike.container import NamedList
+        _RPY2 = {
+            "ro": ro,
+            "robjects": robjects,
+            "pandas2ri": pandas2ri,
+            "localconverter": localconverter,
+            "BoolVector": BoolVector,
+            "FloatVector": FloatVector,
+            "IntVector": IntVector,
+            "ListVector": ListVector,
+            "StrVector": StrVector,
+            "NULLType": NULLType,
+            "NamedList": NamedList,
+        }
+        return _RPY2
+    except ImportError as e:
+        if raise_on_missing:
+            raise RuntimeError(
+                "R support requires optional dependency `rpy2`. Install with: pip install rpy-bridge[r]"
+            ) from e
+        return None
+def _ensure_rpy2() -> dict:
+    global _RPY2
+    if _RPY2 is None:
+        _RPY2 = _require_rpy2()
+    return _RPY2
+# ---------------------------------------------------------------------
+# Activate renv
+# ---------------------------------------------------------------------
+def activate_renv(path_to_renv: Path) -> None:
+    r = _ensure_rpy2()
+    robjects = r["robjects"]
+    path_to_renv = path_to_renv.resolve()
     if path_to_renv.name == "renv" and (path_to_renv / "activate.R").exists():
-        # Path points directly to renv directory
         renv_dir = path_to_renv
-        renv_project_dir = path_to_renv.parent
+        project_dir = path_to_renv.parent
     else:
-        # Path points to parent directory containing renv/
         renv_dir = path_to_renv / "renv"
-        renv_project_dir = path_to_renv
+        project_dir = path_to_renv
     renv_activate = renv_dir / "activate.R"
-    renv_lock = renv_project_dir / "renv.lock"
+    renv_lock = project_dir / "renv.lock"
     if not renv_activate.exists() or not renv_lock.exists():
-        raise FileNotFoundError(
-            f"[Error] renv environment not found or incomplete.\n"
-            f"  Expected activate.R at: {renv_activate}\n"
-            f"  Expected renv.lock at: {renv_lock}\n"
-            f"  Provided path: {path_to_renv}"
-        )
+        raise FileNotFoundError(f"[Error] renv environment incomplete: {path_to_renv}")
-    # Optional: set R_ENVIRON_USER if .Renviron exists
-    renviron_file = renv_project_dir / ".Renviron"
+    renviron_file = project_dir / ".Renviron"
     if renviron_file.is_file():
         os.environ["R_ENVIRON_USER"] = str(renviron_file)
-        logger.info("R_ENVIRON_USER set to: {}", renviron_file)
+        logger.info(f"R_ENVIRON_USER set to: {renviron_file}")
+    rprofile_file = project_dir / ".Rprofile"
+    if rprofile_file.is_file():
+        robjects.r(f'source("{rprofile_file.as_posix()}")')
+        logger.info(f".Rprofile sourced: {rprofile_file}")
-    # Load the renv package
     try:
-        robjects.r("library(renv)")
+        robjects.r("suppressMessages(library(renv))")
     except Exception:
-        print("[Info] renv package not found in R. Attempting to install...")
-        robjects.r('install.packages("renv", repos="https://cloud.r-project.org")')
-        # Try loading again after installation
+        logger.info("Installing renv package in project library...")
+        robjects.r(
+            f'install.packages("renv", repos="https://cloud.r-project.org", lib="{renv_dir / "library"}")'
+        )
         robjects.r("library(renv)")
-    # Load the renv environment using renv::load(path)
-    try:
-        logger.info("Using R at: {}", robjects.r("R.home()")[0])
-        robjects.r(f'renv::load("{renv_project_dir.as_posix()}")')
-        logger.info("renv environment loaded for project: {}", renv_project_dir)
-    except Exception as e:
-        raise RuntimeError(f"[Error] Failed to load renv environment: {e}")
-    logger.debug(".libPaths(): {}", robjects.r(".libPaths()"))
+    robjects.r(f'renv::load("{project_dir.as_posix()}")')
+    logger.info(f"renv environment loaded for project: {project_dir}")
-# %%
+# ---------------------------------------------------------------------
+# RFunctionCaller
+# ---------------------------------------------------------------------
 class RFunctionCaller:
     """
-    A utility class to load and execute R functions from a specified R script using rpy2.
-    """
+    Utility to load and call R functions from a script, lazily loading rpy2 and activating renv.
-    def __init__(self, path_to_renv: Path | None, script_path: Path):
-        """
-        Initialize the RFunctionCaller with the path to the renv environment and the R script.
-        Set path_to_renv to None if no renv is used.
-        """
-        if not script_path.exists():
-            raise FileNotFoundError(f"R script not found: {script_path}")
+    Supports:
+    - Scripts with custom functions
+    - Base R functions
+    - Functions in installed packages
+    - Automatic conversion of Python types (lists, dicts, scalars, pandas DataFrames) to R objects
+    """
+    def __init__(
+        self,
+        path_to_renv: Path | None = None,
+        script_path: Path | None = None,
+        packages: list[str] | None = None,
+    ):
         self.path_to_renv = path_to_renv.resolve() if path_to_renv else None
+        self.script_path = script_path.resolve() if script_path else None
+        self.packages = packages or None
+        # Lazy-loaded attributes
+        self._r = None
+        self.ro = None
+        self.robjects = None
+        self.pandas2ri = None
+        self.localconverter = None
+        self.IntVector = None
+        self.FloatVector = None
+        self.BoolVector = None
+        self.StrVector = None
+        self.ListVector = None
+        self.NamedList = None
+        if self.script_path and not self.script_path.exists():
+            raise FileNotFoundError(f"R script not found: {self.script_path}")
+        self.script_dir = self.script_path.parent if self.script_path else None
+        self._script_loaded = False
+        self._renv_activated = False
+        self._packages_loaded = False
+    # -----------------------------------------------------------------
+    # Internal: lazy R loading
+    # -----------------------------------------------------------------
+    def _ensure_r_loaded(self):
+        if self._r is None:
+            r = _require_rpy2(raise_on_missing=True)
+            self._r = r
+            self.ro = r["ro"]
+            self.robjects = r["robjects"]
+            self.pandas2ri = r["pandas2ri"]
+            self.localconverter = r["localconverter"]
+            self.IntVector = r["IntVector"]
+            self.FloatVector = r["FloatVector"]
+            self.BoolVector = r["BoolVector"]
+            self.StrVector = r["StrVector"]
+            self.ListVector = r["ListVector"]
+            self.NamedList = r["NamedList"]
+        # Activate renv
+        if self.path_to_renv and not self._renv_activated:
+            activate_renv(self.path_to_renv)
+            self._renv_activated = True
-        self.script_path = script_path.resolve()
-        self.script_dir = self.script_path.parent
-        self._load_script()
-    def _load_script(self):
+        # Load packages
+        if self.packages and not self._packages_loaded:
+            for pkg in self.packages:
+                try:
+                    self.robjects.r(f'suppressMessages(library("{pkg}"))')
+                except Exception:
+                    logger.info(f"Package '{pkg}' not found. Installing...")
+                    self.robjects.r(
+                        f'install.packages("{pkg}", repos="https://cloud.r-project.org")'
+                    )
+                    self.robjects.r(f'suppressMessages(library("{pkg}"))')
+            self._packages_loaded = True
+        # Source script
+        if self.script_path and not self._script_loaded:
+            self.robjects.r(f'setwd("{self.script_dir.as_posix()}")')
+            self.robjects.r(f'source("{self.script_path.as_posix()}")')
+            logger.info(f"R script sourced: {self.script_path.name}")
+            self._script_loaded = True
+    def _clean_scalar(self, x):
         """
-        Set the R working directory and source the R script.
+        Clean R-style missing values to pandas/NumPy equivalents.
+        Called inside _r2py on each vector element; atomic/scalar only.
         """
-        if self.path_to_renv:
-            activate_renv(self.path_to_renv)
-        else:
-            logger.info("No renv path provided; using base or current environment.")
+        r = self._r
+        ro = r["robjects"]
-        # Set the working directory to the script's directory
-        robjects.r(f'setwd("{self.script_dir.as_posix()}")')
-        robjects.r(f'source("{self.script_path.as_posix()}")')
-        logger.info("R script sourced: {}", self.script_path.name)
+        if x is None:
+            return None
-    def call(self, function_name: str, *args: object, **kwargs: object) -> object:
-        """
-        Call an R function from the sourced script, and recursively convert &
-        post-process the result.
+        if x in (
+            getattr(ro, "NA_Real", None),
+            getattr(ro, "NA_Integer", None),
+            getattr(ro, "NA_Logical", None),
+        ):
+            return None
-        Handles:
-        - Direct data.frame
-        - NamedList or ListVector
-        - Nested lists with data.frames inside
-        """
+        if x is getattr(ro, "NA_Character", None):
+            return None
-        def _recursive_postprocess(obj):
-            # Handle single DataFrame
-            if isinstance(obj, pd.DataFrame):
-                return postprocess_r_dataframe(obj)
+        if isinstance(x, float) and np.isnan(x):
+            return None
-            # Handle dictionary (e.g. NamedList converted)
-            elif isinstance(obj, dict):
-                return {k: _recursive_postprocess(v) for k, v in obj.items()}
+        return x
-            # Handle list of items
-            elif isinstance(obj, list):
-                return [_recursive_postprocess(item) for item in obj]
+    # -----------------------------------------------------------------
+    # Python -> R conversion
+    # -----------------------------------------------------------------
+    def _py2r(self, obj):
+        """
+        Convert Python objects to R objects robustly.
+        Handles scalars, None/pd.NA, lists, dicts, and pandas DataFrames.
+        """
+        self._ensure_r_loaded()
+        robjects = self.robjects
+        pandas2ri = self.pandas2ri
+        IntVector = self.IntVector
+        FloatVector = self.FloatVector
+        BoolVector = self.BoolVector
+        StrVector = self.StrVector
+        ListVector = self.ListVector
+        localconverter = self.localconverter
+        import pandas as pd
+        import rpy2.robjects.vectors as rvec
+        # Pass through existing R objects
+        if isinstance(
+            obj,
+            (
+                rvec.IntVector,
+                rvec.FloatVector,
+                rvec.BoolVector,
+                rvec.StrVector,
+                rvec.ListVector,
+                robjects.DataFrame,
+            ),
+        ):
+            return obj
-            return obj  # Primitive values stay as-is
+        with localconverter(robjects.default_converter + pandas2ri.converter):
+            if obj is None or obj is pd.NA:
+                return robjects.NULL
+            # DataFrame → data.frame
+            if isinstance(obj, pd.DataFrame):
+                return pandas2ri.py2rpy(obj)
+            # Series → vector
+            if isinstance(obj, pd.Series):
+                return self._py2r(obj.tolist())
+            # Scalars
+            if isinstance(obj, (int, float, bool, str)):
+                return obj
+            # Lists
+            if isinstance(obj, list):
+                if len(obj) == 0:
+                    return FloatVector([])
+                elif all(isinstance(x, (int, float)) or x is None for x in obj):
+                    return FloatVector(
+                        [robjects.NA_Real if x is None else float(x) for x in obj]
+                    )
+                def is_na(x):
+                    return (
+                        x is None or x is pd.NA or (isinstance(x, float) and pd.isna(x))
+                    )
+                # Homogeneous numeric
+                if all(
+                    isinstance(x, (int, float)) and not isinstance(x, bool) or is_na(x)
+                    for x in obj
+                ):
+                    return FloatVector(
+                        [robjects.NA_Real if is_na(x) else float(x) for x in obj]
+                    )
+                # Homogeneous bool
+                if all(isinstance(x, bool) or is_na(x) for x in obj):
+                    return BoolVector(
+                        [robjects.NA_Logical if is_na(x) else x for x in obj]
+                    )
+                # Homogeneous str
+                if all(isinstance(x, str) or is_na(x) for x in obj):
+                    return StrVector(
+                        [robjects.NA_Character if is_na(x) else x for x in obj]
+                    )
+                # Mixed or nested list → ListVector with positional keys
+                return ListVector({str(i): self._py2r(v) for i, v in enumerate(obj)})
+            # Dict → NamedList
+            if isinstance(obj, dict):
+                return ListVector({k: self._py2r(v) for k, v in obj.items()})
+            raise NotImplementedError(f"Cannot convert Python object to R: {type(obj)}")
+    # -----------------------------------------------------------------
+    # R -> Python conversion
+    # -----------------------------------------------------------------
+    def _r2py(self, obj, top_level=True):
+        """
+        Convert R objects to Python objects robustly.
+        Handles DataFrames, NamedList/ListVector, atomic vectors, and NULL.
+        """
+        r = self._r
+        robjects = self.robjects
+        NamedList = self.NamedList
+        ListVector = self.ListVector
+        StrVector = self.StrVector
+        IntVector = self.IntVector
+        FloatVector = self.FloatVector
+        BoolVector = self.BoolVector
+        NULLType = r["NULLType"]
+        lc = self.localconverter
+        pandas2ri = self.pandas2ri
+        if isinstance(obj, NULLType):
+            return None
+        if isinstance(obj, robjects.DataFrame):
+            with lc(robjects.default_converter + pandas2ri.converter):
+                df = robjects.conversion.rpy2py(obj)
+            df = postprocess_r_dataframe(df)
+            df = clean_r_missing(df, caller=self)
+            return df
+        if isinstance(obj, (NamedList, ListVector)):
+            py_obj = r_namedlist_to_dict(obj, caller=self, top_level=top_level)
+            # Auto-unpack single-element lists only at top-level
+            if isinstance(py_obj, list) and len(py_obj) == 1 and top_level:
+                return py_obj[0]
+            return py_obj
+        if isinstance(obj, (StrVector, IntVector, FloatVector, BoolVector)):
+            py_list = [self._clean_scalar(v) for v in obj]
+            if len(py_list) == 1 and top_level:
+                return py_list[0]
+            return py_list
+        return self._clean_scalar(obj)
+    # -----------------------------------------------------------------
+    # Public: ensure R package is available
+    # -----------------------------------------------------------------
+    def ensure_r_package(self, pkg_name: str):
+        r = self.robjects.r
         try:
-            r_func = robjects.globalenv[function_name]
-            with localconverter(robjects.default_converter + pandas2ri.converter):
-                r_args = [robjects.conversion.py2rpy(arg) for arg in args]
-                r_kwargs = {k: robjects.conversion.py2rpy(v) for k, v in kwargs.items()}
-                result = r_func(*r_args, **r_kwargs)
+            r(f'suppressMessages(library("{pkg_name}", character.only=TRUE))')
+        except Exception:
+            r(f'install.packages("{pkg_name}", repos="https://cloud.r-project.org")')
+            r(f'suppressMessages(library("{pkg_name}", character.only=TRUE))')
-            # Step 1: Try direct conversion
-            with localconverter(robjects.default_converter + pandas2ri.converter):
-                py_result = robjects.conversion.rpy2py(result)
+    # -----------------------------------------------------------------
+    # Public: call an R function
+    # -----------------------------------------------------------------
+    def call(self, func_name: str, *args, **kwargs):
+        """
+        Call an R function safely. Supports:
+        - functions defined in scripts
+        - base R functions
+        - functions in loaded packages
+        """
+        self._ensure_r_loaded()
-            # Step 2: If it's still an R container, convert it
-            if isinstance(py_result, (NamedList, ListVector)):
-                py_result = r_namedlist_to_dict(py_result)
+        # --- Find the function ---
+        try:
+            func = self.robjects.globalenv[func_name]  # script-defined
+        except KeyError:
+            try:
+                func = self.robjects.r[func_name]  # base or package function
+            except KeyError:
+                raise ValueError(f"R function '{func_name}' not found.")
-            # Step 3: Recursively process any nested frames
-            return replace_r_na(_recursive_postprocess(py_result))
+        # --- Convert Python args to R ---
+        r_args = [self._py2r(a) for a in args]
+        r_kwargs = {k: self._py2r(v) for k, v in kwargs.items()}
-        except KeyError:
-            raise ValueError(f"Function '{function_name}' not found in the R script.")
+        # --- Call safely ---
+        try:
+            result = func(*r_args, **r_kwargs)
         except Exception as e:
-            raise RuntimeError(f"Error calling R function '{function_name}': {e}")
-    @classmethod
-    def from_github(
-        cls,
-        repo: str,
-        file_path: str,
-        ref: str = "main",
-        token: Optional[str] = None,
-        cache_dir: Optional[Path] = None,
-        path_to_renv: Optional[Path] = None,
-        trust_remote_code: bool = False,
-        require_token: bool = False,
-    ) -> "RFunctionCaller | Path":
-        """
-        Download an R script from a GitHub repository and construct an RFunctionCaller.
-        Args:
-            repo: repository in the form "owner/repo".
-            file_path: path to the R script inside the repo (e.g. "scripts/my.R").
-            ref: branch name, tag or commit SHA. Defaults to "main".
-            token: optional GitHub token for private repos. If None, looks at
-                environment variables `GITHUB_TOKEN` or `GH_TOKEN`.
-            cache_dir: optional directory to cache downloaded files. Defaults to
-                `~/.cache/rpy-bridge`.
-            path_to_renv: optional path to renv or project directory to use.
-            trust_remote_code: MUST be True to execute remote code. If False,
-                the function will only return the local cached path.
-        Returns:
-            If `trust_remote_code` is True, returns an `RFunctionCaller` instance
-            ready to call functions from the downloaded script. Otherwise returns
-            the `Path` to the cached script so the caller can inspect it first.
-        """
-        raise NotImplementedError(
-            "RFunctionCaller.from_github was removed. Clone repositories locally and pass a local script_path to RFunctionCaller instead."
-        )
+            raise RuntimeError(f"Error calling R function '{func_name}': {e}")
+        # --- Convert R result back to Python ---
+        return self._r2py(result)
 # %%
-def r_namedlist_to_dict(namedlist: object) -> object:
+# ------------------------------
+# Utility functions for R ↔ Python
+# ------------------------------
+def r_namedlist_to_dict(namedlist, caller: RFunctionCaller, top_level=False):
     """
     Recursively convert an R NamedList or ListVector to a Python dictionary.
-    - Unwrap atomic R vectors (StrVector, IntVector, etc.) into Python lists or dicts if named.
-    - Convert data.frames to pandas DataFrames.
-    - Handles NULL or unnamed cases gracefully.
+    Uses the caller._r2py method for nested conversions.
     """
+    r = _ensure_rpy2()
+    NamedList = r["NamedList"]
+    ListVector = r["ListVector"]
-    # -------------------------------------------
-    # Handle named lists (NamedList or ListVector)
-    # -------------------------------------------
     if isinstance(namedlist, (NamedList, ListVector)):
         names = namedlist.names if not callable(namedlist.names) else namedlist.names()
+        # Detect positional (unnamed) list
+        if names and all(str(i) == str(name) for i, name in enumerate(names)):
+            out = []
+            for v in namedlist:
+                # Nested elements are never top-level
+                val = caller._r2py(v, top_level=False)
+                out.append(val)
+            return out
+        # Otherwise dict
         result = {}
+        for i, val in enumerate(namedlist):
+            key = names[i] if names and i < len(names) else str(i)
+            v_py = caller._r2py(val, top_level=False)  # nested elements
+            result[str(key)] = v_py
+        return result
-        # Only iterate if names is not NULL
-        if not isinstance(names, NULLType):
-            for key, value in zip(names, namedlist):
-                key_str = str(key) if key is not None and not isinstance(key, NULLType) else None
-                if key_str:
-                    result[key_str] = r_namedlist_to_dict(value)
-            return result
-        # If no names, fallback to a list
-        return [r_namedlist_to_dict(value) for value in namedlist]
-    # -------------------------------------------
-    # Handle atomic vectors (StrVector, IntVector, etc.)
-    # These may have names (e.g., c(a = 1, b = 2)) — if so, return a dict.
-    # Otherwise, convert to plain Python list.
-    # -------------------------------------------
-    if isinstance(namedlist, (StrVector, IntVector, FloatVector, BoolVector)):
-        names = namedlist.names if not callable(namedlist.names) else namedlist.names()
-        if not isinstance(names, NULLType):
-            return {
-                str(n): v
-                for n, v in zip(names, list(namedlist))
-                if n is not None and not isinstance(n, NULLType)
-            }
-        return list(namedlist)
-    # -------------------------------------------
-    # Attempt conversion via pandas2ri — works for data.frames, tibbles, etc.
-    # If it fails, fall back to returning the original R object.
-    # -------------------------------------------
-    with localconverter(robjects.default_converter + pandas2ri.converter):
-        try:
-            return robjects.conversion.rpy2py(namedlist)
-        except Exception:
-            return namedlist
+    # Fallback: scalar/vector at the very top
+    return caller._r2py(namedlist, top_level=top_level)
-# %%
-def clean_r_dataframe(r_df: object) -> object:
+def clean_r_dataframe(r_df: pd.DataFrame) -> pd.DataFrame:
     """
-    Clean an R data.frame object by removing common non-structural attributes
-    like .groups and .rows.
+    Clean an R data.frame by removing non-structural attributes like .groups and .rows.
     """
     for attr in [".groups", ".rows"]:
         try:
-            del r_df.attr[attr]
+            del r_df.attrs[attr]
         except (KeyError, AttributeError):
             pass
     return r_df
-# %%
 def fix_string_nans(df: pd.DataFrame) -> pd.DataFrame:
-    # Replace common string versions of NA/NaN with actual pd.NA
-    return df.replace(["nan", "NaN", "NA", "na", ""], pd.NA)
-# %%
-def replace_r_na(obj: object) -> object:
     """
-    Recursively replace R NA_Character with np.nan in any structure.
+    Replace string NAs or empty strings with pd.NA.
     """
-    # Handle DataFrame
-    if isinstance(obj, pd.DataFrame):
-        return (
-            obj.replace({ro.NA_Character: np.nan}, regex=False)
-            if hasattr(ro, "NA_Character")
-            else obj
-        )
-    elif isinstance(obj, dict):
-        return {k: replace_r_na(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        return [replace_r_na(item) for item in obj]
-    elif hasattr(ro, "NA_Character") and obj is ro.NA_Character:
-        return np.nan
-    else:
-        return obj
+    return df.replace(["nan", "NaN", "NA", "na", ""], pd.NA)
-# %%
 def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Normalize dtypes in a single DataFrame after R conversion.
+    """
     df = df.replace(["", "nan", "NaN", "NA", "na"], pd.NA)
     for col in df.columns:
         series = df[col]
-        # Try converting object/string columns to numeric if possible
         if pd.api.types.is_object_dtype(series):
             coerced = pd.to_numeric(series, errors="coerce")
-            # Replace column if conversion produced fewer NaNs (meaning more numeric)
             if coerced.notna().sum() >= series.notna().sum() * 0.5:
                 df[col] = coerced
-        # Cast integer columns with NA to float to accommodate pd.NA
-        if pd.api.types.is_integer_dtype(df[col]):
-            if df[col].isna().any():
-                df[col] = df[col].astype("float64")
+        if pd.api.types.is_integer_dtype(df[col]) and df[col].isna().any():
+            df[col] = df[col].astype("float64")
     return df
-# %%
 def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
     """
-    Post-process a DataFrame converted from R via rpy2:
-    - Converts numeric columns that represent R dates into datetime
-    - Converts timezone-aware datetimes to naive datetimes
-    - Replaces R's NA_integer_ sentinel (-2147483648) with pd.NA
+    Post-process R DataFrame:
+    - Convert R NA_integer_ sentinel (-2147483648) to pd.NA
+    - Convert R-style numeric dates to datetime
+    - Remove timezone from datetime columns
     """
     for col in df.columns:
         series = df[col]
-        # Fix R's NA_integer_ sentinel (-2147483648)
         if pd.api.types.is_integer_dtype(series):
-            if (series == -2147483648).any():
-                df[col] = series.mask(series == -2147483648, pd.NA)
+            df[col] = series.mask(series == -2147483648, pd.NA)
-        # Convert R-style date columns (days since 1970) to datetime
         if pd.api.types.is_numeric_dtype(series):
             values = series.dropna()
             if not values.empty and values.between(10000, 40000).all():
                 try:
-                    # "1970-01-01" is the reference date for Unix Epoch
-                    df[col] = pd.to_datetime("1970-01-01") + pd.to_timedelta(series, unit="D")
+                    df[col] = pd.to_datetime("1970-01-01") + pd.to_timedelta(
+                        series, unit="D"
+                    )
                 except Exception:
                     pass
-        # Remove timezone from datetime columns (e.g., POSIXct with tz)
         if pd.api.types.is_datetime64tz_dtype(series):
             df[col] = series.dt.tz_localize(None)
     return df
-# %%
 def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Apply a series of fixes to a DataFrame converted from R:
+    - Type corrections
+    - String NA normalization
+    - Index normalization
+    """
     df = fix_r_dataframe_types(df)
     df = fix_string_nans(df)
     df = normalize_single_df_dtypes(df)
-    # Normalize R-style string index starting from "1"
     if df.index.dtype == object:
         try:
             int_index = df.index.astype(int)
-            if (int_index == (np.arange(len(df)) + 1)).all():
+            if (int_index == np.arange(len(df)) + 1).all():
                 df.index = pd.RangeIndex(start=0, stop=len(df))
         except Exception:
-            pass  # leave index as-is if not convertible
+            pass
     return df
-# Note: GitHub fetch helpers were removed to keep the API focused on
-# local script invocation. If you need to run remote scripts, clone the
-# repository locally and pass the local `script_path` to `RFunctionCaller`.
+def clean_r_missing(obj, caller: RFunctionCaller):
+    """
+    Post-process R return objects for downstream Python use.
+    Recursively convert R-style missing values to pandas/NumPy:
+    - NA_integer_, NA_real_, NA_logical_ → np.nan
+    - NA_character_ → pd.NA
+    """
+    r = _ensure_rpy2()
+    ro = r["robjects"]
+    NA_MAP = {
+        getattr(ro, "NA_Real", None): np.nan,
+        getattr(ro, "NA_Integer", None): np.nan,
+        getattr(ro, "NA_Logical", None): np.nan,
+        getattr(ro, "NA_Character", None): pd.NA,
+    }
+    if isinstance(obj, pd.DataFrame):
+        for col in obj.columns:
+            obj[col] = obj[col].apply(lambda x: clean_r_missing(x, caller))
+        return obj
+    elif isinstance(obj, dict):
+        return {k: clean_r_missing(v, caller) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [clean_r_missing(v, caller) for v in obj]
+    else:
+        return NA_MAP.get(obj, obj)
 # %%
@@ -404,7 +654,9 @@ def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
 # -------------------------------------------
-def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
+def normalize_dtypes(
+    df1: pd.DataFrame, df2: pd.DataFrame
+) -> tuple[pd.DataFrame, pd.DataFrame]:
     """
     Aligns column dtypes across two DataFrames for accurate comparison.
     - Replaces empty strings with pd.NA.
@@ -420,8 +672,12 @@ def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame
         dtype1, dtype2 = s1.dtype, s2.dtype
         # If one is numeric and the other is object, try coercing both to numeric
-        if (pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_object_dtype(dtype2)) or (
-            pd.api.types.is_object_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2)
+        if (
+            pd.api.types.is_numeric_dtype(dtype1)
+            and pd.api.types.is_object_dtype(dtype2)
+        ) or (
+            pd.api.types.is_object_dtype(dtype1)
+            and pd.api.types.is_numeric_dtype(dtype2)
         ):
             try:
                 df1[col] = pd.to_numeric(s1, errors="coerce")
@@ -431,7 +687,9 @@ def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame
                 pass  # fallback to next block if coercion fails
         # If both are numeric but of different types (e.g., int vs float), unify to float64
-        if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2):
+        if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(
+            dtype2
+        ):
             df1[col] = df1[col].astype("float64")
             df2[col] = df2[col].astype("float64")
             continue
@@ -445,7 +703,9 @@ def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame
 # %%
-def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
+def align_numeric_dtypes(
+    df1: pd.DataFrame, df2: pd.DataFrame
+) -> tuple[pd.DataFrame, pd.DataFrame]:
     """
     Ensure aligned numeric dtypes between two DataFrames for accurate comparison.
     Converts between int, float, and numeric-looking strings where appropriate.
@@ -479,7 +739,9 @@ def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataF
 # %%
-def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8) -> dict:
+def compare_r_py_dataframes(
+    df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8
+) -> dict:
     """
     Compare a Python DataFrame (df1) with an R DataFrame converted to pandas (df2).
@@ -530,7 +792,9 @@ def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: flo
         col_py = df1_aligned[col]
         col_r = df2_aligned[col]
-        if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(col_r):
+        if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(
+            col_r
+        ):
             col_py, col_r = col_py.align(col_r)
             close = np.isclose(

rpy-bridge 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

rpy-bridge 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl