rpy-bridge 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rpy_bridge/__init__.py +4 -28
- rpy_bridge/compare.py +106 -0
- rpy_bridge/convert.py +63 -0
- rpy_bridge/core.py +505 -0
- rpy_bridge/dataframe.py +74 -0
- rpy_bridge/env.py +108 -0
- rpy_bridge/logging.py +50 -0
- rpy_bridge/renv.py +149 -0
- rpy_bridge/rpy2_loader.py +71 -0
- rpy_bridge-0.5.0.dist-info/METADATA +297 -0
- rpy_bridge-0.5.0.dist-info/RECORD +15 -0
- rpy_bridge/rpy2_utils.py +0 -1221
- rpy_bridge-0.4.0.dist-info/METADATA +0 -258
- rpy_bridge-0.4.0.dist-info/RECORD +0 -8
- {rpy_bridge-0.4.0.dist-info → rpy_bridge-0.5.0.dist-info}/WHEEL +0 -0
- {rpy_bridge-0.4.0.dist-info → rpy_bridge-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {rpy_bridge-0.4.0.dist-info → rpy_bridge-0.5.0.dist-info}/top_level.txt +0 -0
rpy_bridge/__init__.py
CHANGED
|
@@ -1,38 +1,14 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Public API for the rpy-bridge package.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
`RFunctionCaller` is the primary entry point for loading R scripts and calling
|
|
5
|
+
functions. Other helpers are re-exported for compatibility.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from .
|
|
9
|
-
|
|
10
|
-
RFunctionCaller,
|
|
11
|
-
activate_renv,
|
|
12
|
-
align_numeric_dtypes,
|
|
13
|
-
clean_r_dataframe,
|
|
14
|
-
clean_r_missing,
|
|
15
|
-
compare_r_py_dataframes,
|
|
16
|
-
fix_r_dataframe_types,
|
|
17
|
-
fix_string_nans,
|
|
18
|
-
normalize_dtypes,
|
|
19
|
-
normalize_single_df_dtypes,
|
|
20
|
-
postprocess_r_dataframe,
|
|
21
|
-
r_namedlist_to_dict,
|
|
22
|
-
)
|
|
8
|
+
from .core import RFunctionCaller
|
|
9
|
+
from .renv import activate_renv
|
|
23
10
|
|
|
24
11
|
__all__ = [
|
|
25
12
|
"activate_renv",
|
|
26
13
|
"RFunctionCaller",
|
|
27
|
-
"NamespaceWrapper",
|
|
28
|
-
"r_namedlist_to_dict",
|
|
29
|
-
"clean_r_dataframe",
|
|
30
|
-
"fix_string_nans",
|
|
31
|
-
"clean_r_missing",
|
|
32
|
-
"normalize_single_df_dtypes",
|
|
33
|
-
"fix_r_dataframe_types",
|
|
34
|
-
"postprocess_r_dataframe",
|
|
35
|
-
"normalize_dtypes",
|
|
36
|
-
"align_numeric_dtypes",
|
|
37
|
-
"compare_r_py_dataframes",
|
|
38
14
|
]
|
rpy_bridge/compare.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DataFrame comparison helpers used to validate parity between R and Python outputs.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from .dataframe import fix_r_dataframe_types, fix_string_nans
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
16
|
+
for col in df1.columns.intersection(df2.columns):
|
|
17
|
+
df1[col] = df1[col].replace("", pd.NA)
|
|
18
|
+
df2[col] = df2[col].replace("", pd.NA)
|
|
19
|
+
s1, s2 = df1[col], df2[col]
|
|
20
|
+
dtype1, dtype2 = s1.dtype, s2.dtype
|
|
21
|
+
if (pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_object_dtype(dtype2)) or (
|
|
22
|
+
pd.api.types.is_object_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2)
|
|
23
|
+
):
|
|
24
|
+
try:
|
|
25
|
+
df1[col] = pd.to_numeric(s1, errors="coerce")
|
|
26
|
+
df2[col] = pd.to_numeric(s2, errors="coerce")
|
|
27
|
+
continue
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2):
|
|
31
|
+
df1[col] = df1[col].astype("float64")
|
|
32
|
+
df2[col] = df2[col].astype("float64")
|
|
33
|
+
continue
|
|
34
|
+
if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
|
|
35
|
+
df1[col] = df1[col].astype(str)
|
|
36
|
+
df2[col] = df2[col].astype(str)
|
|
37
|
+
return df1, df2
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
41
|
+
for col in df1.columns.intersection(df2.columns):
|
|
42
|
+
s1, s2 = df1[col].replace("", pd.NA), df2[col].replace("", pd.NA)
|
|
43
|
+
try:
|
|
44
|
+
s1_num = pd.to_numeric(s1, errors="coerce")
|
|
45
|
+
s2_num = pd.to_numeric(s2, errors="coerce")
|
|
46
|
+
if not s1_num.isna().all() or not s2_num.isna().all():
|
|
47
|
+
df1[col] = s1_num.astype("float64")
|
|
48
|
+
df2[col] = s2_num.astype("float64")
|
|
49
|
+
continue
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
df1[col], df2[col] = s1, s2
|
|
53
|
+
return df1, df2
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8) -> dict:
|
|
57
|
+
results: dict[str, Any] = {
|
|
58
|
+
"shape_mismatch": False,
|
|
59
|
+
"columns_mismatch": False,
|
|
60
|
+
"index_mismatch": False,
|
|
61
|
+
"numeric_diffs": {},
|
|
62
|
+
"non_numeric_diffs": {},
|
|
63
|
+
}
|
|
64
|
+
df2 = fix_r_dataframe_types(df2)
|
|
65
|
+
df1 = fix_string_nans(df1)
|
|
66
|
+
df2 = fix_string_nans(df2)
|
|
67
|
+
df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
|
|
68
|
+
df1, df2 = align_numeric_dtypes(df1, df2)
|
|
69
|
+
if df1.shape != df2.shape:
|
|
70
|
+
results["shape_mismatch"] = True
|
|
71
|
+
print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
|
|
72
|
+
if set(df1.columns) != set(df2.columns):
|
|
73
|
+
results["columns_mismatch"] = True
|
|
74
|
+
print("[Warning] Column mismatch:")
|
|
75
|
+
print(f" df1: {df1.columns}")
|
|
76
|
+
print(f" df2: {df2.columns}")
|
|
77
|
+
common_cols = df1.columns.intersection(df2.columns)
|
|
78
|
+
else:
|
|
79
|
+
common_cols = df1.columns
|
|
80
|
+
df1_aligned, df2_aligned = df1.loc[:, common_cols], df2.loc[:, common_cols]
|
|
81
|
+
for col in common_cols:
|
|
82
|
+
col_py, col_r = df1_aligned[col], df2_aligned[col]
|
|
83
|
+
if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(col_r):
|
|
84
|
+
col_py, col_r = col_py.align(col_r)
|
|
85
|
+
close = np.isclose(
|
|
86
|
+
col_py.fillna(np.nan),
|
|
87
|
+
col_r.fillna(np.nan),
|
|
88
|
+
atol=float_tol,
|
|
89
|
+
equal_nan=True,
|
|
90
|
+
)
|
|
91
|
+
if not close.all():
|
|
92
|
+
results["numeric_diffs"][col] = pd.DataFrame(
|
|
93
|
+
{"df1": col_py[~close], "df2": col_r[~close]}
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
unequal = ~col_py.eq(col_r)
|
|
97
|
+
both_na = col_py.isna() & col_r.isna()
|
|
98
|
+
unequal = unequal & ~both_na
|
|
99
|
+
if unequal.any():
|
|
100
|
+
results["non_numeric_diffs"][col] = pd.DataFrame(
|
|
101
|
+
{"df1": col_py[unequal], "df2": col_r[unequal]}
|
|
102
|
+
)
|
|
103
|
+
return results
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
__all__ = ["normalize_dtypes", "align_numeric_dtypes", "compare_r_py_dataframes"]
|
rpy_bridge/convert.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Conversion helpers for R ↔ Python interop.
|
|
3
|
+
|
|
4
|
+
These utilities are used by RFunctionCaller and exposed for compatibility.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from .rpy2_loader import ensure_rpy2
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
17
|
+
from .core import RFunctionCaller
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def r_namedlist_to_dict(namedlist, caller: "RFunctionCaller", top_level: bool = False):
|
|
21
|
+
r = ensure_rpy2()
|
|
22
|
+
NamedList = r["NamedList"]
|
|
23
|
+
ListVector = r["ListVector"]
|
|
24
|
+
|
|
25
|
+
if isinstance(namedlist, (NamedList, ListVector)):
|
|
26
|
+
names = namedlist.names if not callable(namedlist.names) else namedlist.names()
|
|
27
|
+
|
|
28
|
+
if names and all(str(i) == str(name) for i, name in enumerate(names)):
|
|
29
|
+
out = []
|
|
30
|
+
for val in namedlist:
|
|
31
|
+
out.append(caller._r2py(val, top_level=False))
|
|
32
|
+
return out
|
|
33
|
+
|
|
34
|
+
result = {}
|
|
35
|
+
for i, val in enumerate(namedlist):
|
|
36
|
+
key = names[i] if names and i < len(names) else str(i)
|
|
37
|
+
result[str(key)] = caller._r2py(val, top_level=False)
|
|
38
|
+
return result
|
|
39
|
+
|
|
40
|
+
return caller._r2py(namedlist, top_level=top_level)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def clean_r_missing(obj, caller: "RFunctionCaller"):
|
|
44
|
+
robjects = caller.robjects
|
|
45
|
+
na_map = {
|
|
46
|
+
getattr(robjects, "NA_Real", None): np.nan,
|
|
47
|
+
getattr(robjects, "NA_Integer", None): np.nan,
|
|
48
|
+
getattr(robjects, "NA_Logical", None): np.nan,
|
|
49
|
+
getattr(robjects, "NA_Character", None): pd.NA,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if isinstance(obj, pd.DataFrame):
|
|
53
|
+
for col in obj.columns:
|
|
54
|
+
obj[col] = obj[col].apply(lambda x: clean_r_missing(x, caller))
|
|
55
|
+
return obj
|
|
56
|
+
if isinstance(obj, dict):
|
|
57
|
+
return {k: clean_r_missing(v, caller) for k, v in obj.items()}
|
|
58
|
+
if isinstance(obj, list):
|
|
59
|
+
return [clean_r_missing(v, caller) for v in obj]
|
|
60
|
+
return na_map.get(obj, obj)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
__all__ = ["r_namedlist_to_dict", "clean_r_missing"]
|