rpy-bridge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rpy_bridge/__init__.py ADDED
@@ -0,0 +1,36 @@
1
+ """
2
+ Public API for the rpy-bridge package.
3
+
4
+ This module re-exports the helpers that wrap rpy2 so downstream users can
5
+ continue importing directly from ``rpy_bridge``.
6
+ """
7
+
8
+ from .rpy2_utils import (
9
+ RFunctionCaller,
10
+ activate_renv,
11
+ align_numeric_dtypes,
12
+ clean_r_dataframe,
13
+ compare_r_py_dataframes,
14
+ fix_r_dataframe_types,
15
+ fix_string_nans,
16
+ normalize_dtypes,
17
+ normalize_single_df_dtypes,
18
+ postprocess_r_dataframe,
19
+ replace_r_na,
20
+ r_namedlist_to_dict,
21
+ )
22
+
23
+ __all__ = [
24
+ "activate_renv",
25
+ "RFunctionCaller",
26
+ "r_namedlist_to_dict",
27
+ "clean_r_dataframe",
28
+ "fix_string_nans",
29
+ "replace_r_na",
30
+ "normalize_single_df_dtypes",
31
+ "fix_r_dataframe_types",
32
+ "postprocess_r_dataframe",
33
+ "normalize_dtypes",
34
+ "align_numeric_dtypes",
35
+ "compare_r_py_dataframes",
36
+ ]
rpy_bridge/py.typed ADDED
File without changes
@@ -0,0 +1,569 @@
1
+ """
2
+ Wrapper for calling R functions from Python using rpy2.
3
+
4
+ ----------
5
+ ** R must be installed and accessible in your environment **
6
+ Ensure compatibility with your R project's renv setup (or other virtual env/base env if that's what you're using).
7
+ ----------
8
+ """
9
+
10
+ # ruff: noqa: E402
11
+ # %%
12
+ # Import libraries
13
+ import os
14
+ import warnings
15
+
16
+ warnings.filterwarnings("ignore", message="Environment variable .* redefined by R")
17
+
18
+ from pathlib import Path
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ import rpy2.robjects as ro
23
+ from rpy2 import robjects
24
+ from rpy2.rinterface_lib.sexp import NULLType
25
+ from rpy2.rlike.container import NamedList
26
+ from rpy2.robjects import pandas2ri
27
+ from rpy2.robjects.conversion import localconverter
28
+ from rpy2.robjects.vectors import (
29
+ BoolVector,
30
+ FloatVector,
31
+ IntVector,
32
+ ListVector,
33
+ StrVector,
34
+ )
35
+ from typing import Optional
36
+
37
+ try:
38
+ from loguru import logger # type: ignore
39
+ except Exception:
40
+ import logging
41
+
42
+ logging.basicConfig()
43
+ logger = logging.getLogger("rpy-bridge")
44
+
45
+
46
+ # %%
47
+ def activate_renv(path_to_renv: Path) -> None:
48
+ """
49
+ Activates the renv environment using renv::load() to ensure the correct project is loaded.
50
+ This avoids sourcing activate.R directly and avoids accidentally initializing a new environment.
51
+
52
+ Accepts either:
53
+ - Direct path to renv directory (e.g., /path/to/renv)
54
+ - Parent directory containing renv/ folder (e.g., /path/to/repos where renv/ is inside)
55
+ """
56
+
57
+ path_to_renv = path_to_renv.resolve()
58
+
59
+ # Determine if path_to_renv is the renv directory itself or its parent
60
+ if path_to_renv.name == "renv" and (path_to_renv / "activate.R").exists():
61
+ # Path points directly to renv directory
62
+ renv_dir = path_to_renv
63
+ renv_project_dir = path_to_renv.parent
64
+ else:
65
+ # Path points to parent directory containing renv/
66
+ renv_dir = path_to_renv / "renv"
67
+ renv_project_dir = path_to_renv
68
+
69
+ renv_activate = renv_dir / "activate.R"
70
+ renv_lock = renv_project_dir / "renv.lock"
71
+
72
+ if not renv_activate.exists() or not renv_lock.exists():
73
+ raise FileNotFoundError(
74
+ f"[Error] renv environment not found or incomplete.\n"
75
+ f" Expected activate.R at: {renv_activate}\n"
76
+ f" Expected renv.lock at: {renv_lock}\n"
77
+ f" Provided path: {path_to_renv}"
78
+ )
79
+
80
+ # Optional: set R_ENVIRON_USER if .Renviron exists
81
+ renviron_file = renv_project_dir / ".Renviron"
82
+ if renviron_file.is_file():
83
+ os.environ["R_ENVIRON_USER"] = str(renviron_file)
84
+ logger.info("R_ENVIRON_USER set to: {}", renviron_file)
85
+
86
+ # Load the renv package
87
+ try:
88
+ robjects.r("library(renv)")
89
+ except Exception:
90
+ print("[Info] renv package not found in R. Attempting to install...")
91
+ robjects.r('install.packages("renv", repos="https://cloud.r-project.org")')
92
+ # Try loading again after installation
93
+ robjects.r("library(renv)")
94
+
95
+ # Load the renv environment using renv::load(path)
96
+ try:
97
+ logger.info("Using R at: {}", robjects.r("R.home()")[0])
98
+ robjects.r(f'renv::load("{renv_project_dir.as_posix()}")')
99
+ logger.info("renv environment loaded for project: {}", renv_project_dir)
100
+ except Exception as e:
101
+ raise RuntimeError(f"[Error] Failed to load renv environment: {e}")
102
+
103
+ logger.debug(".libPaths(): {}", robjects.r(".libPaths()"))
104
+
105
+
106
+ # %%
107
+ class RFunctionCaller:
108
+ """
109
+ A utility class to load and execute R functions from a specified R script using rpy2.
110
+ """
111
+
112
+ def __init__(self, path_to_renv: Path | None, script_path: Path):
113
+ """
114
+ Initialize the RFunctionCaller with the path to the renv environment and the R script.
115
+ Set path_to_renv to None if no renv is used.
116
+ """
117
+ if not script_path.exists():
118
+ raise FileNotFoundError(f"R script not found: {script_path}")
119
+
120
+ self.path_to_renv = path_to_renv.resolve() if path_to_renv else None
121
+
122
+ self.script_path = script_path.resolve()
123
+ self.script_dir = self.script_path.parent
124
+
125
+ self._load_script()
126
+
127
+ def _load_script(self):
128
+ """
129
+ Set the R working directory and source the R script.
130
+ """
131
+ if self.path_to_renv:
132
+ activate_renv(self.path_to_renv)
133
+ else:
134
+ logger.info("No renv path provided; using base or current environment.")
135
+
136
+ # Set the working directory to the script's directory
137
+ robjects.r(f'setwd("{self.script_dir.as_posix()}")')
138
+ robjects.r(f'source("{self.script_path.as_posix()}")')
139
+ logger.info("R script sourced: {}", self.script_path.name)
140
+
141
+ def call(self, function_name: str, *args: object, **kwargs: object) -> object:
142
+ """
143
+ Call an R function from the sourced script, and recursively convert &
144
+ post-process the result.
145
+
146
+ Handles:
147
+ - Direct data.frame
148
+ - NamedList or ListVector
149
+ - Nested lists with data.frames inside
150
+ """
151
+
152
+ def _recursive_postprocess(obj):
153
+ # Handle single DataFrame
154
+ if isinstance(obj, pd.DataFrame):
155
+ return postprocess_r_dataframe(obj)
156
+
157
+ # Handle dictionary (e.g. NamedList converted)
158
+ elif isinstance(obj, dict):
159
+ return {k: _recursive_postprocess(v) for k, v in obj.items()}
160
+
161
+ # Handle list of items
162
+ elif isinstance(obj, list):
163
+ return [_recursive_postprocess(item) for item in obj]
164
+
165
+ return obj # Primitive values stay as-is
166
+
167
+ try:
168
+ r_func = robjects.globalenv[function_name]
169
+
170
+ with localconverter(robjects.default_converter + pandas2ri.converter):
171
+ r_args = [robjects.conversion.py2rpy(arg) for arg in args]
172
+ r_kwargs = {k: robjects.conversion.py2rpy(v) for k, v in kwargs.items()}
173
+ result = r_func(*r_args, **r_kwargs)
174
+
175
+ # Step 1: Try direct conversion
176
+ with localconverter(robjects.default_converter + pandas2ri.converter):
177
+ py_result = robjects.conversion.rpy2py(result)
178
+
179
+ # Step 2: If it's still an R container, convert it
180
+ if isinstance(py_result, (NamedList, ListVector)):
181
+ py_result = r_namedlist_to_dict(py_result)
182
+
183
+ # Step 3: Recursively process any nested frames
184
+ return replace_r_na(_recursive_postprocess(py_result))
185
+
186
+ except KeyError:
187
+ raise ValueError(f"Function '{function_name}' not found in the R script.")
188
+ except Exception as e:
189
+ raise RuntimeError(f"Error calling R function '{function_name}': {e}")
190
+
191
+ @classmethod
192
+ def from_github(
193
+ cls,
194
+ repo: str,
195
+ file_path: str,
196
+ ref: str = "main",
197
+ token: Optional[str] = None,
198
+ cache_dir: Optional[Path] = None,
199
+ path_to_renv: Optional[Path] = None,
200
+ trust_remote_code: bool = False,
201
+ require_token: bool = False,
202
+ ) -> "RFunctionCaller | Path":
203
+ """
204
+ Download an R script from a GitHub repository and construct an RFunctionCaller.
205
+
206
+ Args:
207
+ repo: repository in the form "owner/repo".
208
+ file_path: path to the R script inside the repo (e.g. "scripts/my.R").
209
+ ref: branch name, tag or commit SHA. Defaults to "main".
210
+ token: optional GitHub token for private repos. If None, looks at
211
+ environment variables `GITHUB_TOKEN` or `GH_TOKEN`.
212
+ cache_dir: optional directory to cache downloaded files. Defaults to
213
+ `~/.cache/rpy-bridge`.
214
+ path_to_renv: optional path to renv or project directory to use.
215
+ trust_remote_code: MUST be True to execute remote code. If False,
216
+ the function will only return the local cached path.
217
+
218
+ Returns:
219
+ If `trust_remote_code` is True, returns an `RFunctionCaller` instance
220
+ ready to call functions from the downloaded script. Otherwise returns
221
+ the `Path` to the cached script so the caller can inspect it first.
222
+ """
223
+ raise NotImplementedError(
224
+ "RFunctionCaller.from_github was removed. Clone repositories locally and pass a local script_path to RFunctionCaller instead."
225
+ )
226
+
227
+
228
+ # %%
229
+ def r_namedlist_to_dict(namedlist: object) -> object:
230
+ """
231
+ Recursively convert an R NamedList or ListVector to a Python dictionary.
232
+ - Unwrap atomic R vectors (StrVector, IntVector, etc.) into Python lists or dicts if named.
233
+ - Convert data.frames to pandas DataFrames.
234
+ - Handles NULL or unnamed cases gracefully.
235
+ """
236
+
237
+ # -------------------------------------------
238
+ # Handle named lists (NamedList or ListVector)
239
+ # -------------------------------------------
240
+ if isinstance(namedlist, (NamedList, ListVector)):
241
+ names = namedlist.names if not callable(namedlist.names) else namedlist.names()
242
+ result = {}
243
+
244
+ # Only iterate if names is not NULL
245
+ if not isinstance(names, NULLType):
246
+ for key, value in zip(names, namedlist):
247
+ key_str = str(key) if key is not None and not isinstance(key, NULLType) else None
248
+ if key_str:
249
+ result[key_str] = r_namedlist_to_dict(value)
250
+ return result
251
+
252
+ # If no names, fallback to a list
253
+ return [r_namedlist_to_dict(value) for value in namedlist]
254
+
255
+ # -------------------------------------------
256
+ # Handle atomic vectors (StrVector, IntVector, etc.)
257
+ # These may have names (e.g., c(a = 1, b = 2)) — if so, return a dict.
258
+ # Otherwise, convert to plain Python list.
259
+ # -------------------------------------------
260
+ if isinstance(namedlist, (StrVector, IntVector, FloatVector, BoolVector)):
261
+ names = namedlist.names if not callable(namedlist.names) else namedlist.names()
262
+ if not isinstance(names, NULLType):
263
+ return {
264
+ str(n): v
265
+ for n, v in zip(names, list(namedlist))
266
+ if n is not None and not isinstance(n, NULLType)
267
+ }
268
+ return list(namedlist)
269
+
270
+ # -------------------------------------------
271
+ # Attempt conversion via pandas2ri — works for data.frames, tibbles, etc.
272
+ # If it fails, fall back to returning the original R object.
273
+ # -------------------------------------------
274
+ with localconverter(robjects.default_converter + pandas2ri.converter):
275
+ try:
276
+ return robjects.conversion.rpy2py(namedlist)
277
+ except Exception:
278
+ return namedlist
279
+
280
+
281
+ # %%
282
+ def clean_r_dataframe(r_df: object) -> object:
283
+ """
284
+ Clean an R data.frame object by removing common non-structural attributes
285
+ like .groups and .rows.
286
+ """
287
+ for attr in [".groups", ".rows"]:
288
+ try:
289
+ del r_df.attr[attr]
290
+ except (KeyError, AttributeError):
291
+ pass
292
+ return r_df
293
+
294
+
295
+ # %%
296
+ def fix_string_nans(df: pd.DataFrame) -> pd.DataFrame:
297
+ # Replace common string versions of NA/NaN with actual pd.NA
298
+ return df.replace(["nan", "NaN", "NA", "na", ""], pd.NA)
299
+
300
+
301
+ # %%
302
+ def replace_r_na(obj: object) -> object:
303
+ """
304
+ Recursively replace R NA_Character with np.nan in any structure.
305
+ """
306
+ # Handle DataFrame
307
+ if isinstance(obj, pd.DataFrame):
308
+ return (
309
+ obj.replace({ro.NA_Character: np.nan}, regex=False)
310
+ if hasattr(ro, "NA_Character")
311
+ else obj
312
+ )
313
+ elif isinstance(obj, dict):
314
+ return {k: replace_r_na(v) for k, v in obj.items()}
315
+ elif isinstance(obj, list):
316
+ return [replace_r_na(item) for item in obj]
317
+ elif hasattr(ro, "NA_Character") and obj is ro.NA_Character:
318
+ return np.nan
319
+ else:
320
+ return obj
321
+
322
+
323
+ # %%
324
+ def normalize_single_df_dtypes(df: pd.DataFrame) -> pd.DataFrame:
325
+ df = df.replace(["", "nan", "NaN", "NA", "na"], pd.NA)
326
+
327
+ for col in df.columns:
328
+ series = df[col]
329
+
330
+ # Try converting object/string columns to numeric if possible
331
+ if pd.api.types.is_object_dtype(series):
332
+ coerced = pd.to_numeric(series, errors="coerce")
333
+ # Replace column if conversion produced fewer NaNs (meaning more numeric)
334
+ if coerced.notna().sum() >= series.notna().sum() * 0.5:
335
+ df[col] = coerced
336
+
337
+ # Cast integer columns with NA to float to accommodate pd.NA
338
+ if pd.api.types.is_integer_dtype(df[col]):
339
+ if df[col].isna().any():
340
+ df[col] = df[col].astype("float64")
341
+
342
+ return df
343
+
344
+
345
+ # %%
346
+ def fix_r_dataframe_types(df: pd.DataFrame) -> pd.DataFrame:
347
+ """
348
+ Post-process a DataFrame converted from R via rpy2:
349
+ - Converts numeric columns that represent R dates into datetime
350
+ - Converts timezone-aware datetimes to naive datetimes
351
+ - Replaces R's NA_integer_ sentinel (-2147483648) with pd.NA
352
+ """
353
+ for col in df.columns:
354
+ series = df[col]
355
+
356
+ # Fix R's NA_integer_ sentinel (-2147483648)
357
+ if pd.api.types.is_integer_dtype(series):
358
+ if (series == -2147483648).any():
359
+ df[col] = series.mask(series == -2147483648, pd.NA)
360
+
361
+ # Convert R-style date columns (days since 1970) to datetime
362
+ if pd.api.types.is_numeric_dtype(series):
363
+ values = series.dropna()
364
+ if not values.empty and values.between(10000, 40000).all():
365
+ try:
366
+ # "1970-01-01" is the reference date for Unix Epoch
367
+ df[col] = pd.to_datetime("1970-01-01") + pd.to_timedelta(series, unit="D")
368
+ except Exception:
369
+ pass
370
+
371
+ # Remove timezone from datetime columns (e.g., POSIXct with tz)
372
+ if pd.api.types.is_datetime64tz_dtype(series):
373
+ df[col] = series.dt.tz_localize(None)
374
+
375
+ return df
376
+
377
+
378
+ # %%
379
+ def postprocess_r_dataframe(df: pd.DataFrame) -> pd.DataFrame:
380
+ df = fix_r_dataframe_types(df)
381
+ df = fix_string_nans(df)
382
+ df = normalize_single_df_dtypes(df)
383
+
384
+ # Normalize R-style string index starting from "1"
385
+ if df.index.dtype == object:
386
+ try:
387
+ int_index = df.index.astype(int)
388
+ if (int_index == (np.arange(len(df)) + 1)).all():
389
+ df.index = pd.RangeIndex(start=0, stop=len(df))
390
+ except Exception:
391
+ pass # leave index as-is if not convertible
392
+ return df
393
+
394
+
395
+ # Note: GitHub fetch helpers were removed to keep the API focused on
396
+ # local script invocation. If you need to run remote scripts, clone the
397
+ # repository locally and pass the local `script_path` to `RFunctionCaller`.
398
+
399
+
400
+ # %%
401
+ # -------------------------------------------
402
+ # Functions here onwards are utility functions
403
+ # for comparing R and Python DataFrames.
404
+ # -------------------------------------------
405
+
406
+
407
+ def normalize_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
408
+ """
409
+ Aligns column dtypes across two DataFrames for accurate comparison.
410
+ - Replaces empty strings with pd.NA.
411
+ - Attempts to coerce strings to numeric where applicable.
412
+ - Aligns dtypes between matching columns (e.g. float64 vs int64).
413
+ """
414
+ for col in df1.columns.intersection(df2.columns):
415
+ # Replace empty strings with NA
416
+ df1[col] = df1[col].replace("", pd.NA)
417
+ df2[col] = df2[col].replace("", pd.NA)
418
+
419
+ s1, s2 = df1[col], df2[col]
420
+ dtype1, dtype2 = s1.dtype, s2.dtype
421
+
422
+ # If one is numeric and the other is object, try coercing both to numeric
423
+ if (pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_object_dtype(dtype2)) or (
424
+ pd.api.types.is_object_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2)
425
+ ):
426
+ try:
427
+ df1[col] = pd.to_numeric(s1, errors="coerce")
428
+ df2[col] = pd.to_numeric(s2, errors="coerce")
429
+ continue # skip to next column if coercion succeeds
430
+ except Exception:
431
+ pass # fallback to next block if coercion fails
432
+
433
+ # If both are numeric but of different types (e.g., int vs float), unify to float64
434
+ if pd.api.types.is_numeric_dtype(dtype1) and pd.api.types.is_numeric_dtype(dtype2):
435
+ df1[col] = df1[col].astype("float64")
436
+ df2[col] = df2[col].astype("float64")
437
+ continue
438
+
439
+ # If both are objects or strings, convert both to str for equality comparison
440
+ if pd.api.types.is_object_dtype(dtype1) or pd.api.types.is_object_dtype(dtype2):
441
+ df1[col] = df1[col].astype(str)
442
+ df2[col] = df2[col].astype(str)
443
+
444
+ return df1, df2
445
+
446
+
447
+ # %%
448
+ def align_numeric_dtypes(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
449
+ """
450
+ Ensure aligned numeric dtypes between two DataFrames for accurate comparison.
451
+ Converts between int, float, and numeric-looking strings where appropriate.
452
+ Also handles NA and empty string normalization.
453
+ """
454
+ for col in df1.columns.intersection(df2.columns):
455
+ s1, s2 = df1[col], df2[col]
456
+
457
+ # Replace empty strings with NA to avoid type promotion issues
458
+ s1 = s1.replace("", pd.NA)
459
+ s2 = s2.replace("", pd.NA)
460
+
461
+ # Try to coerce both to numeric (non-destructive)
462
+ try:
463
+ s1_num = pd.to_numeric(s1, errors="coerce")
464
+ s2_num = pd.to_numeric(s2, errors="coerce")
465
+
466
+ # If at least one successfully converts and it's not all NaN
467
+ if not s1_num.isna().all() or not s2_num.isna().all():
468
+ df1[col] = s1_num.astype("float64")
469
+ df2[col] = s2_num.astype("float64")
470
+ continue # move to next column
471
+ except Exception:
472
+ pass
473
+
474
+ # Otherwise, fall back to original values
475
+ df1[col] = s1
476
+ df2[col] = s2
477
+
478
+ return df1, df2
479
+
480
+
481
+ # %%
482
+ def compare_r_py_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, float_tol: float = 1e-8) -> dict:
483
+ """
484
+ Compare a Python DataFrame (df1) with an R DataFrame converted to pandas (df2).
485
+
486
+ Returns:
487
+ dict with mismatch diagnostics, preserving original indices in diffs.
488
+ """
489
+
490
+ results = {
491
+ "shape_mismatch": False,
492
+ "columns_mismatch": False,
493
+ "index_mismatch": False,
494
+ "numeric_diffs": {},
495
+ "non_numeric_diffs": {},
496
+ }
497
+
498
+ # --- Preprocessing: fix R-specific issues ---
499
+ df2 = fix_r_dataframe_types(df2)
500
+
501
+ # --- Replace common string NAs with proper pd.NA ---
502
+ df1 = fix_string_nans(df1)
503
+ df2 = fix_string_nans(df2)
504
+
505
+ # --- Normalize and align dtypes ---
506
+ df1, df2 = normalize_dtypes(df1.copy(), df2.copy())
507
+ df1, df2 = align_numeric_dtypes(df1, df2)
508
+
509
+ # --- Check shape ---
510
+ if df1.shape != df2.shape:
511
+ results["shape_mismatch"] = True
512
+ print(f"[Warning] Shape mismatch: df1 {df1.shape} vs df2 {df2.shape}")
513
+
514
+ # --- Check columns ---
515
+ if set(df1.columns) != set(df2.columns):
516
+ results["columns_mismatch"] = True
517
+ print("[Warning] Column mismatch:")
518
+ print(f" df1: {df1.columns}")
519
+ print(f" df2: {df2.columns}")
520
+ common_cols = df1.columns.intersection(df2.columns)
521
+ else:
522
+ common_cols = df1.columns
523
+
524
+ # --- Ensure columns are the same order ---
525
+ df1_aligned = df1.loc[:, common_cols]
526
+ df2_aligned = df2.loc[:, common_cols]
527
+
528
+ # --- Compare values column by column ---
529
+ for col in common_cols:
530
+ col_py = df1_aligned[col]
531
+ col_r = df2_aligned[col]
532
+
533
+ if pd.api.types.is_numeric_dtype(col_py) and pd.api.types.is_numeric_dtype(col_r):
534
+ col_py, col_r = col_py.align(col_r)
535
+
536
+ close = np.isclose(
537
+ col_py.fillna(np.nan),
538
+ col_r.fillna(np.nan),
539
+ atol=float_tol,
540
+ equal_nan=True,
541
+ )
542
+ if not close.all():
543
+ diffs = pd.DataFrame(
544
+ {
545
+ "df1": col_py[~close],
546
+ "df2": col_r[~close],
547
+ }
548
+ )
549
+ results["numeric_diffs"][col] = diffs
550
+
551
+ else:
552
+ # Treat missing values as equal: create mask where values differ excluding matching NAs
553
+ unequal = ~col_py.eq(col_r)
554
+ both_na = col_py.isna() & col_r.isna()
555
+ unequal = unequal & ~both_na
556
+
557
+ if unequal.any():
558
+ diffs = pd.DataFrame(
559
+ {
560
+ "df1": col_py[unequal],
561
+ "df2": col_r[unequal],
562
+ }
563
+ )
564
+ results["non_numeric_diffs"][col] = diffs
565
+
566
+ return results
567
+
568
+
569
+ # %%
@@ -0,0 +1,205 @@
1
+ Metadata-Version: 2.4
2
+ Name: rpy-bridge
3
+ Version: 0.1.0
4
+ Summary: Bridge helpers for calling R from Python via rpy2
5
+ Author-email: Victoria Cheung <victoriakcheung@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Victoria Cheung
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Acknowledgement: This project builds on work originally developed at
29
+ Revolution Medicines and interfaces with the rpy2 project, which is licensed
30
+ under the GNU General Public License version 2 or later.
31
+
32
+ Project-URL: Homepage, https://github.com/vic-cheung/rpy-bridge
33
+ Project-URL: Issue Tracker, https://github.com/vic-cheung/rpy-bridge/issues
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
40
+ Requires-Python: >=3.11
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+ Requires-Dist: numpy>=1.24
44
+ Requires-Dist: pandas>=2.0
45
+ Requires-Dist: rpy2>=3.5
46
+ Requires-Dist: loguru>=0.7
47
+ Requires-Dist: ipykernel>=7.1.0
48
+ Dynamic: license-file
49
+
50
+ # rpy-bridge
51
+
52
+ Utilities for calling R code from Python using `rpy2`. It provides a small
53
+ wrapper that can (optionally) activate an `renv` project, source an R
54
+ script, call functions from that script, and post-process results into
55
+ well-typed pandas `DataFrame` objects.
56
+
57
+ This project was developed for bilingual teams where some functions are
58
+ authored in R and the primary consumer is a Python-centric developer. It
59
+ acts as an interoperability layer so a Python programmer can call and reuse
60
+ R functions (written and maintained by R authors) without reimplementing
61
+ that logic in Python.
62
+
63
+ ## Installation
64
+
65
+ Prerequisites
66
+
67
+ - System R installed and available on `PATH` (rpy2 requires a working R
68
+ installation).
69
+ - Python 3.12+
70
+
71
+ Installation
72
+
73
+ Install from PyPI or as an editable local package during development:
74
+
75
+ ```bash
76
+ # From PyPI (recommended for consumers)
77
+ python3 -m pip install rpy-bridge
78
+
79
+ # During development (install editable from local source)
80
+ python3 -m pip install -e .
81
+ ```
82
+
83
+ Required Python packages (the installer will pull these in):
84
+
85
+ - `rpy2` (GPLv2 or later)
86
+ - `pandas`
87
+ - `numpy`
88
+
89
+ ## Usage
90
+
91
+ ```python
92
+ from pathlib import Path
93
+
94
+ from rpy_bridge import RFunctionCaller
95
+
96
+ caller = RFunctionCaller(
97
+ path_to_renv=Path("/path/to/project"),
98
+ script_path=Path("/path/to/script.R"),
99
+ )
100
+
101
+ summary_df = caller.call("summarize_cohort", cohort_df)
102
+
103
+
104
+ ## Examples
105
+
106
+ Basic — run a local R script
107
+
108
+ ```python
109
+ from pathlib import Path
110
+ from rpy_bridge import RFunctionCaller
111
+
112
+ # If your project uses renv, pass the project directory (parent of renv/)
113
+ project_dir = Path("/path/to/your-r-project")
114
+ script = project_dir / "scripts" / "example.R"
115
+
116
+ # If you do not use renv, pass None for path_to_renv
117
+ caller = RFunctionCaller(path_to_renv=project_dir, script_path=script)
118
+ result = caller.call("some_function", 42, named_arg="value")
119
+ print(type(result))
120
+ ```
121
+
122
+ Notes:
123
+
124
+ `path_to_renv` may be either the project directory (containing `renv/`) or
125
+ the `renv/` directory itself. When provided, `RFunctionCaller` will call
126
+ `renv::load()` so the R session uses the project's library versions. If
127
+ `path_to_renv` is `None`, `rpy-bridge` will use whatever R environment is
128
+ visible to the Python process (system R or an R environment you activated
129
+ before starting Python).
130
+
131
+ The intended workflow is:
132
+
133
+ - Clone or download the R script into your local filesystem (review the
134
+ code if it came from a remote source).
135
+ - Construct an `RFunctionCaller` with `script_path` pointing to the local
136
+ script and optionally `path_to_renv` to activate the project's R library.
137
+
138
+ This keeps network, token, and SSL concerns outside the package while
139
+ preserving an easy path for Python-first users to call R-written functions.
140
+
141
+ If you need to run an R script from a remote repository, clone or download
142
+ the script locally, review it, and then construct an `RFunctionCaller`
143
+ pointing at the local `script_path`. This keeps network, token, and SSL
144
+ concerns outside the package and avoids environment-specific failures.
145
+
146
+ ```python
147
+ from rpy_bridge import RFunctionCaller
148
+
149
+ project_dir = Path("/path/to/cloned/repo")
150
+ script = project_dir / "scripts" / "analysis.R"
151
+
152
+ caller = RFunctionCaller(path_to_renv=None, script_path=script)
153
+ result = caller.call("analyze", some_arg=42)
154
+ ```
155
+
156
+ ## R Setup
157
+
158
+ If you plan to execute R code with `rpy-bridge`, use the helper scripts in
159
+ `examples/r-deps/` to prepare an R environment.
160
+
161
+ - On macOS (Homebrew) install system deps:
162
+
163
+ ```bash
164
+ bash examples/r-deps/install_r_dev_deps_homebrew.sh
165
+ ```
166
+
167
+ - Initialize a project `renv` (run in an R session):
168
+
169
+ ```r
170
+ source("examples/r-deps/setup_env.R")
171
+ ```
172
+
173
+ - Restore the environment on a new machine:
174
+
175
+ ```r
176
+ renv::restore()
177
+ ```
178
+
179
+ Review the scripts in `examples/r-deps/` before running; they install system
180
+ libraries and R packages and should be run from a trusted environment. For
181
+ CI, use `r-lib/actions/setup-r` to install R, then run the `Rscript` command
182
+ above to prepare the `renv` environment.
183
+
184
+ ## Collaboration note
185
+
186
+ This repository provides example R setup scripts for teams working across
187
+ Python and R. Each project may require different R packages — check the
188
+ package list in `examples/r-deps/setup_env.R` and commit a `renv.lock` for
189
+ project-specific reproducibility.
190
+
191
+ Clone repositories containing R scripts locally or use your
192
+ preferred tooling to obtain scripts before execution.
193
+
194
+ ## Licensing
195
+
196
+ - `rpy-bridge` is released under the MIT License © 2025 Victoria Cheung.
197
+ - The project depends on [`rpy2`](https://rpy2.github.io) which is licensed
198
+ under the GNU General Public License v2 (or later). Distributing binaries that
199
+ bundle `rpy2` must comply with the GPL terms. When you install `rpy-bridge`
200
+ as a dependency, `rpy2` is resolved directly from its upstream maintainers.
201
+
202
+ ### Thanks
203
+
204
+ This package was spun out of internal tooling at Revolution Medicines.
205
+ Many thanks to the team there for allowing the code to be open sourced.
@@ -0,0 +1,8 @@
1
+ rpy_bridge/__init__.py,sha256=56cw27pgg7ES9zl6-rwKz01Ky3MRvJX2dusBAjbGXM4,823
2
+ rpy_bridge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ rpy_bridge/rpy2_utils.py,sha256=13BDM0577R4cjZ06CSH0Glv0xkA9ZbaTe4gS9eno_ko,20536
4
+ rpy_bridge-0.1.0.dist-info/licenses/LICENSE,sha256=JwbWVcSfeoLfZ2M_ZiyygKVDvhBDW3zbqTWwXOJwmrA,1276
5
+ rpy_bridge-0.1.0.dist-info/METADATA,sha256=t7wkdtOKNvB4Ty2oPajgoJiqw88FW0AgTdaSDca4Jew,7229
6
+ rpy_bridge-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ rpy_bridge-0.1.0.dist-info/top_level.txt,sha256=z9UZ77ZuUPoLqMDQEpP4btstsaM1IpXb9Cn9yBVaHmU,11
8
+ rpy_bridge-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,25 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Victoria Cheung
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ Acknowledgement: This project builds on work originally developed at
24
+ Revolution Medicines and interfaces with the rpy2 project, which is licensed
25
+ under the GNU General Public License version 2 or later.
@@ -0,0 +1 @@
1
+ rpy_bridge