rpy-bridge 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rpy_bridge/__init__.py CHANGED
@@ -1,38 +1,14 @@
1
1
  """
2
2
  Public API for the rpy-bridge package.
3
3
 
4
- This module re-exports the helpers that wrap rpy2 so downstream users can
5
- continue importing directly from ``rpy_bridge``.
4
+ `RFunctionCaller` is the primary entry point for loading R scripts and calling
5
+ functions. Other helpers are re-exported for compatibility.
6
6
  """
7
7
 
8
- from .rpy2_utils import (
9
- NamespaceWrapper,
10
- RFunctionCaller,
11
- activate_renv,
12
- align_numeric_dtypes,
13
- clean_r_dataframe,
14
- clean_r_missing,
15
- compare_r_py_dataframes,
16
- fix_r_dataframe_types,
17
- fix_string_nans,
18
- normalize_dtypes,
19
- normalize_single_df_dtypes,
20
- postprocess_r_dataframe,
21
- r_namedlist_to_dict,
22
- )
8
+ from .core import RFunctionCaller
9
+ from .renv import activate_renv
23
10
 
24
11
  __all__ = [
25
12
  "activate_renv",
26
13
  "RFunctionCaller",
27
- "NamespaceWrapper",
28
- "r_namedlist_to_dict",
29
- "clean_r_dataframe",
30
- "fix_string_nans",
31
- "clean_r_missing",
32
- "normalize_single_df_dtypes",
33
- "fix_r_dataframe_types",
34
- "postprocess_r_dataframe",
35
- "normalize_dtypes",
36
- "align_numeric_dtypes",
37
- "compare_r_py_dataframes",
38
14
  ]
rpy_bridge/compare.py ADDED
@@ -0,0 +1,106 @@
1
+ """
2
+ DataFrame comparison helpers used to validate parity between R and Python outputs.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from .dataframe import fix_r_dataframe_types, fix_string_nans
13
+
14
+
15
+ def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
16
+ for col in df1.columns.intersection(df2.columns):
17
+ df1[col] = df1[col].replace("", pd.NA)
18
+ df2[col] = df2[col].replace("", pd.NA)
19
+ s1, s2 = df1[col], df2[col]
20
+ dtype1, dtype2 = s1.dtype, s2.dtype
21
+ if (pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_object_dtype(dtype2)) or (
22
+ pd.api.types.is_object_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2)
23
+ ):
24
+ try:
25
+ df1[col] = pd.to_numeric(s1, errors="coerce")
26
+ df2[col] = pd.to_numeric(s2, errors="coerce")
27
+ continue
28
+ except Exception:
29
+ pass
30
+ if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2):
31
+ df1[col] = df1[col].astype("float64")
32
+ df2[col] = df2[col].astype("float64")
33
+ continue
34
+ if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
35
+ df1[col] = df1[col].astype(str)
36
+ df2[col] = df2[col].astype(str)
37
+ return df1, df2
38
+
39
+
40
+ def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
41
+ for col in df1.columns.intersection(df2.columns):
42
+ s1, s2 = df1[col].replace("", pd.NA), df2[col].replace("", pd.NA)
43
+ try:
44
+ s1_num = pd.to_numeric(s1, errors="coerce")
45
+ s2_num = pd.to_numeric(s2, errors="coerce")
46
+ if not s1_num.isna().all() or not s2_num.isna().all():
47
+ df1[col] = s1_num.astype("float64")
48
+ df2[col] = s2_num.astype("float64")
49
+ continue
50
+ except Exception:
51
+ pass
52
+ df1[col], df2[col] = s1, s2
53
+ return df1, df2
54
+
55
+
56
+ def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8) -> dict:
57
+ results: dict[str, Any] = {
58
+ "shape_mismatch": False,
59
+ "columns_mismatch": False,
60
+ "index_mismatch": False,
61
+ "numeric_diffs": {},
62
+ "non_numeric_diffs": {},
63
+ }
64
+ df2 = fix_r_dataframe_types(df2)
65
+ df1 = fix_string_nans(df1)
66
+ df2 = fix_string_nans(df2)
67
+ df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
68
+ df1, df2 = align_numeric_dtypes(df1, df2)
69
+ if df1.shape != df2.shape:
70
+ results["shape_mismatch"] = True
71
+ print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
72
+ if set(df1.columns) != set(df2.columns):
73
+ results["columns_mismatch"] = True
74
+ print("[Warning] Column mismatch:")
75
+ print(f" df1: {df1.columns}")
76
+ print(f" df2: {df2.columns}")
77
+ common_cols = df1.columns.intersection(df2.columns)
78
+ else:
79
+ common_cols = df1.columns
80
+ df1_aligned, df2_aligned = df1.loc[:, common_cols], df2.loc[:, common_cols]
81
+ for col in common_cols:
82
+ col_py, col_r = df1_aligned[col], df2_aligned[col]
83
+ if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(col_r):
84
+ col_py, col_r = col_py.align(col_r)
85
+ close = np.isclose(
86
+ col_py.fillna(np.nan),
87
+ col_r.fillna(np.nan),
88
+ atol=float_tol,
89
+ equal_nan=True,
90
+ )
91
+ if not close.all():
92
+ results["numeric_diffs"][col] = pd.DataFrame(
93
+ {"df1": col_py[~close], "df2": col_r[~close]}
94
+ )
95
+ else:
96
+ unequal = ~col_py.eq(col_r)
97
+ both_na = col_py.isna() & col_r.isna()
98
+ unequal = unequal & ~both_na
99
+ if unequal.any():
100
+ results["non_numeric_diffs"][col] = pd.DataFrame(
101
+ {"df1": col_py[unequal], "df2": col_r[unequal]}
102
+ )
103
+ return results
104
+
105
+
106
+ __all__ = ["normalize_dtypes", "align_numeric_dtypes", "compare_r_py_dataframes"]
rpy_bridge/convert.py ADDED
@@ -0,0 +1,63 @@
1
+ """
2
+ Conversion helpers for R ↔ Python interop.
3
+
4
+ These utilities are used by RFunctionCaller and exposed for compatibility.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from .rpy2_loader import ensure_rpy2
15
+
16
+ if TYPE_CHECKING: # pragma: no cover - typing only
17
+ from .core import RFunctionCaller
18
+
19
+
20
+ def r_namedlist_to_dict(namedlist, caller: "RFunctionCaller", top_level: bool = False):
21
+ r = ensure_rpy2()
22
+ NamedList = r["NamedList"]
23
+ ListVector = r["ListVector"]
24
+
25
+ if isinstance(namedlist, (NamedList, ListVector)):
26
+ names = namedlist.names if not callable(namedlist.names) else namedlist.names()
27
+
28
+ if names and all(str(i) == str(name) for i, name in enumerate(names)):
29
+ out = []
30
+ for val in namedlist:
31
+ out.append(caller._r2py(val, top_level=False))
32
+ return out
33
+
34
+ result = {}
35
+ for i, val in enumerate(namedlist):
36
+ key = names[i] if names and i < len(names) else str(i)
37
+ result[str(key)] = caller._r2py(val, top_level=False)
38
+ return result
39
+
40
+ return caller._r2py(namedlist, top_level=top_level)
41
+
42
+
43
+ def clean_r_missing(obj, caller: "RFunctionCaller"):
44
+ robjects = caller.robjects
45
+ na_map = {
46
+ getattr(robjects, "NA_Real", None): np.nan,
47
+ getattr(robjects, "NA_Integer", None): np.nan,
48
+ getattr(robjects, "NA_Logical", None): np.nan,
49
+ getattr(robjects, "NA_Character", None): pd.NA,
50
+ }
51
+
52
+ if isinstance(obj, pd.DataFrame):
53
+ for col in obj.columns:
54
+ obj[col] = obj[col].apply(lambda x: clean_r_missing(x, caller))
55
+ return obj
56
+ if isinstance(obj, dict):
57
+ return {k: clean_r_missing(v, caller) for k, v in obj.items()}
58
+ if isinstance(obj, list):
59
+ return [clean_r_missing(v, caller) for v in obj]
60
+ return na_map.get(obj, obj)
61
+
62
+
63
+ __all__ = ["r_namedlist_to_dict", "clean_r_missing"]