PyPI - pylantir - Versions diffs - 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

pylantir 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

pylantir/api_server.py +13 -9
pylantir/cli/run.py +307 -41
pylantir/config/calpendo_config_example.json +65 -0
pylantir/config/mwl_config.json +3 -1
pylantir/data_sources/__init__.py +84 -0
pylantir/data_sources/base.py +117 -0
pylantir/data_sources/calpendo_plugin.py +702 -0
pylantir/data_sources/redcap_plugin.py +367 -0
pylantir/db_setup.py +3 -0
pylantir/models.py +3 -0
pylantir/populate_db.py +6 -3
pylantir/redcap_to_db.py +128 -81
{pylantir-0.2.3.dist-info → pylantir-0.3.0.dist-info}/METADATA +305 -23
pylantir-0.3.0.dist-info/RECORD +25 -0
pylantir-0.2.3.dist-info/RECORD +0 -20
{pylantir-0.2.3.dist-info → pylantir-0.3.0.dist-info}/WHEEL +0 -0
{pylantir-0.2.3.dist-info → pylantir-0.3.0.dist-info}/entry_points.txt +0 -0
{pylantir-0.2.3.dist-info → pylantir-0.3.0.dist-info}/licenses/LICENSE +0 -0

pylantir/redcap_to_db.py CHANGED Viewed

@@ -1,6 +1,15 @@
+"""
+LEGACY MODULE - Backward Compatibility Wrapper for REDCapPlugin
+This module provides backward-compatible function signatures that internally
+delegate to the new plugin-based architecture. Existing code can continue
+calling these functions without modification.
+MIGRATION PATH: New code should use src/pylantir/data_sources/redcap_plugin.py
+directly instead of these legacy wrappers.
+"""
 import os
 import logging
-import pandas as pd
 from redcap import Project
 import uuid
 from sqlalchemy.orm import sessionmaker
@@ -13,6 +22,13 @@ import gc
 lgr = logging.getLogger(__name__)
+# Import the new plugin system
+from .data_sources.redcap_plugin import REDCapPlugin
+from .data_sources.base import PluginError
+# NOTE: pandas import removed - we use native Python dicts/lists to avoid
+# DataFrame memory overhead (50-100x memory reduction per sync cycle)
 # Optional memory monitoring (install with: pip install psutil)
 try:
     import psutil
@@ -33,80 +49,71 @@ Session = sessionmaker(bind=engine)
 def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
-    """Fetch REDCap entries using PyCap and return a list of filtered dicts."""
-    project = Project(REDCAP_API_URL, REDCAP_API_TOKEN)
+    """
+    LEGACY WRAPPER: Fetch REDCap entries using PyCap and return a list of filtered dicts.
-    if not redcap_fields:
-        lgr.error("No field mapping (redcap2wl) provided for REDCap retrieval.")
-        return []
+    **DEPRECATION NOTICE**: This function is deprecated and maintained only for
+    backward compatibility. New code should use REDCapPlugin.fetch_entries() directly
+    from src/pylantir/data_sources/redcap_plugin.py
-    # Fetch metadata to get valid REDCap field names
-    valid_fields = {field["field_name"] for field in project.export_metadata()}
-    redcap_fields = [field for field in redcap_fields if field in valid_fields]
+    This function now delegates to REDCapPlugin for consistency with the new
+    plugin architecture. Existing callers can continue using this signature.
-    if not redcap_fields:
-        lgr.error("No valid REDCap fields found in provided mapping.")
-        return []
+    Args:
+        redcap_fields: List of REDCap field names to fetch
+        interval: Time window in seconds to fetch records from
-    lgr.info(f"Fetching REDCap data for fields: {redcap_fields}")
+    Returns:
+        List of filtered MRI record dictionaries
-    # Export data
-    datetime_now = datetime.now()
-    datetime_interval = datetime_now - timedelta(seconds=interval)
-    records = project.export_records(fields=redcap_fields, date_begin=datetime_interval, date_end=datetime_now, format_type="df")
+    MIGRATION PATH: Use REDCapPlugin directly:
+        ```python
+        from pylantir.data_sources.redcap_plugin import REDCapPlugin
+        plugin = REDCapPlugin(name, config, field_mapping)
+        entries = plugin.fetch_entries(since=datetime_interval)
+        ```
+    """
+    lgr.warning(
+        "fetch_redcap_entries() is deprecated. "
+        "Use REDCapPlugin from src/pylantir/data_sources/redcap_plugin.py instead."
+    )
-    # Clean up PyCap Project immediately after export to free API client cache
-    del project
-    gc.collect()
+    try:
+        # Build plugin configuration from environment variables
+        config = {
+            "site_id": "default",  # Legacy calls don't have site_id
+            "protocol": "DEFAULT_PROTOCOL"
+        }
-    if records.empty:
-        lgr.warning("No records retrieved from REDCap.")
-        # Explicitly clean up the empty DataFrame to release any allocated buffers
-        del records
-        gc.collect()
-        return []
+        # Create field mapping for plugin (maps REDCap field name to itself)
+        field_mapping = {field: field for field in redcap_fields}
-    filtered_records = []
-    # Group by 'record_id' (index level 0)
-    # Convert to list to avoid holding groupby iterator reference
-    record_groups = list(records.groupby(level=0))
-    for record_id, group in record_groups:
-        # Try to get baseline (non-repeated instrument) values
-        baseline_rows = group[group['redcap_repeat_instrument'].isna()]
-        baseline_row = baseline_rows.iloc[0] if not baseline_rows.empty else {}
-        # Filter for valid MRI rows only
-        mri_rows = group[
-            (group["redcap_repeat_instrument"] == "mri") &
-            (group.get("mri_instance").notna()) &
-            (group.get("mri_instance") != "" ) &
-            (group.get("mri_date").notna()) &
-            (group.get("mri_time").notna())
-        ]
-        for _, mri_row in mri_rows.iterrows():
-            record = {"record_id": record_id}
-            # Merge fields from baseline and mri_row, only include requested fields
-            for field in redcap_fields:
-                record[field] = (
-                    mri_row.get(field)
-                    if pd.notna(mri_row.get(field))
-                    else baseline_row.get(field)
-                )
+        # Instantiate plugin (no arguments)
+        plugin = REDCapPlugin()
+        # Validate configuration
+        is_valid, error_msg = plugin.validate_config(config)
+        if not is_valid:
+            lgr.error(f"Plugin configuration validation failed: {error_msg}")
+            return []
+        # Fetch entries using plugin
+        datetime_now = datetime.now()
+        datetime_interval = datetime_now - timedelta(seconds=interval)
-            filtered_records.append(record)
+        entries = plugin.fetch_entries(field_mapping=field_mapping, interval=interval)
-    # Explicitly clean up DataFrame and groupby list to free memory
-    del record_groups
-    del records
-    gc.collect()
-    return filtered_records
+        # Cleanup plugin resources
+        plugin.cleanup()
-# TODO: Implement age binning for paricipants
+        return entries
+    except PluginError as e:
+        lgr.error(f"Plugin error in legacy fetch_redcap_entries: {e}")
+        return []
+    except Exception as e:
+        lgr.error(f"Unexpected error in legacy fetch_redcap_entries: {e}")
+        return []# TODO: Implement age binning for paricipants
 def age_binning():
     return None
@@ -163,35 +170,35 @@ def cleanup_memory_and_connections():
     This function should be called after each synchronization cycle.
     """
     lgr.debug("Starting memory and connection cleanup...")
     # Get memory usage before cleanup
     memory_before = get_memory_usage()
     try:
         # 1. Clear pandas cache and temporary objects
         # Force garbage collection of pandas objects
         gc.collect()
         # 2. Close any idle database connections in the pool
         if hasattr(engine, 'pool'):
             # Dispose of the connection pool to free up connections
             lgr.debug("Disposing database connection pool")
             engine.pool.dispose()
         # 3. Force Python garbage collection targeting all generations
         # Target generation 2 (oldest) first to catch long-lived objects
         collected = gc.collect(generation=2)  # Oldest generation
         collected += gc.collect(generation=1)  # Middle generation
         collected += gc.collect(generation=0)  # Youngest generation
         # 4. Clear any cached SQLAlchemy metadata
         if hasattr(engine, 'pool'):
             # Recreate the pool with fresh connections
             engine.pool.recreate()
         # Get memory usage after cleanup
         memory_after = get_memory_usage()
         # Log cleanup results with simplified, focused metrics
         if memory_before and memory_after and 'rss_mb' in memory_before:
             freed = memory_before['rss_mb'] - memory_after['rss_mb']
@@ -203,7 +210,7 @@ def cleanup_memory_and_connections():
             )
         else:
             lgr.info(f"Memory cleanup: Collected {collected} objects")
     except Exception as e:
         lgr.error(f"Error during cleanup: {e}")
         # Don't let cleanup errors stop the main process
@@ -215,8 +222,29 @@ def sync_redcap_to_db(
     protocol: dict,
     redcap2wl: dict,
     interval: float = 60.0,
+    source_name: str = None,
 ) -> None:
-    """Sync REDCap patient data with the worklist database."""
+    """
+    LEGACY WRAPPER: Sync REDCap patient data with the worklist database.
+    **DEPRECATION NOTICE**: This function is deprecated and maintained only for
+    backward compatibility. New code should use the plugin-based architecture
+    from src/pylantir/data_sources/
+    NOTE: This function now uses REDCapPlugin internally via fetch_redcap_entries()
+    wrapper, ensuring consistent behavior with the new plugin architecture.
+    Args:
+        site_id: Site identifier
+        protocol: Protocol mapping dictionary
+        redcap2wl: Field mapping dictionary
+        interval: Sync interval in seconds
+        source_name: Optional data source name for tracking (new in v0.3.0)
+    """
+    lgr.warning(
+        "sync_redcap_to_db() is deprecated. "
+        "Use data_sources configuration with REDCapPlugin instead."
+    )
     if not redcap2wl:
         lgr.error("No field mapping (redcap2wl) provided for syncing.")
@@ -241,6 +269,7 @@ def sync_redcap_to_db(
             if i not in redcap_fields:
                 redcap_fields.append(i)
+        # NOTE: fetch_redcap_entries() now delegates to REDCapPlugin internally
         redcap_entries = fetch_redcap_entries(redcap_fields, interval)
         for record in redcap_entries:
@@ -290,6 +319,9 @@ def sync_redcap_to_db(
                 existing_entry.modality = record.get("modality", "MR")
                 existing_entry.scheduled_start_date = record.get("mri_date")
                 existing_entry.scheduled_start_time = record.get("mri_time")
+                # Track data source if provided
+                if source_name:
+                    existing_entry.data_source = source_name
                 # Dynamically update DICOM worklist fields from REDCap
                 for redcap_field, dicom_field in redcap2wl.items():
                     if redcap_field in record:
@@ -324,13 +356,14 @@ def sync_redcap_to_db(
                     # performing_physician=record.get("performing_physician"),
                     study_description=record.get("study_description", "CPIP"),
                     # station_name=record.get("station_name"),
-                    performed_procedure_step_status="SCHEDULED"
+                    performed_procedure_step_status="SCHEDULED",
+                    data_source=source_name  # Track which source created this entry
                 )
                 session.add(new_entry)
         session.commit()
         logging.info("REDCap data synchronized successfully with DICOM worklist database.")
     except Exception as e:
         lgr.error(f"Error during REDCap synchronization: {e}")
         if session:
@@ -342,10 +375,10 @@ def sync_redcap_to_db(
             # Detach all ORM objects from session to clear identity map
             session.expunge_all()
             session.close()
         # Perform cleanup after sync
         cleanup_memory_and_connections()
         # Log memory usage after cleanup
         memory_after = get_memory_usage()
         if memory_after:
@@ -358,12 +391,22 @@ def sync_redcap_to_db_repeatedly(
     redcap2wl=None,
     interval=60,
     operation_interval={"start_time": [00,00], "end_time": [23,59]},
+    source_name=None,
 ):
     """
-    Keep syncing with REDCap in a loop every `interval` seconds,
-    but only between operation_interval[start_time] and operation_interval[end_time].
-    Exit cleanly when STOP_EVENT is set.
+    LEGACY WRAPPER: Keep syncing with REDCap in a loop every `interval` seconds.
+    **DEPRECATION NOTICE**: This function is deprecated and maintained only for
+    backward compatibility. New code should use the plugin-based multi-source
+    orchestration from src/pylantir/cli/run.py
+    MIGRATION PATH: Configure data_sources array in mwl_config.json and use
+    the new orchestration system.
     """
+    lgr.warning(
+        "sync_redcap_to_db_repeatedly() is deprecated. "
+        "Use data_sources configuration with multi-source orchestration instead."
+    )
     if operation_interval is None:
         operation_interval = {"start_time": [0, 0], "end_time": [23, 59]}
@@ -399,6 +442,8 @@ def sync_redcap_to_db_repeatedly(
                 dt_end_yesterday = datetime.combine(yesterday, end_time)
                 dt_start_today = datetime.combine(today_date, start_time)
                 delta = dt_start_today - dt_end_yesterday
+                #terporary large interval to catch up on missed data
+                # delta = delta + timedelta(seconds=6000000)
                 # guaranteed to be positive if yesterday < today
                 extended_interval = delta.total_seconds()
                 logging.info(f"Using extended interval: {extended_interval}, {interval} seconds until next sync.")
@@ -417,6 +462,7 @@ def sync_redcap_to_db_repeatedly(
                         protocol=protocol,
                         redcap2wl=redcap2wl,
                         interval=extended_interval,
+                        source_name=source_name,
                     )
                 else:
                     sync_redcap_to_db(
@@ -424,6 +470,7 @@ def sync_redcap_to_db_repeatedly(
                         protocol=protocol,
                         redcap2wl=redcap2wl,
                         interval=interval_sync,
+                        source_name=source_name,
                     )
                 last_sync_date = today_date
                 logging.debug(f"REDCap sync completed at {now_time}. Next sync atempt in {interval} seconds.")
@@ -440,7 +487,7 @@ def sync_redcap_to_db_repeatedly(
                 f"Current time {now_time} is outside operation window "
                 f"({start_time}–{end_time}). Sleeping for {interval} seconds."
             )
             # Run periodic cleanup even during off-hours to prevent memory buildup
             # Only run every 10th cycle to avoid excessive overhead
             if (now_dt.hour == 3 and now_dt.minute == 0):  # Daily cleanup at 3 AM

pylantir 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

pylantir 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl