PyPI - pylantir - Versions diffs - 0.2.1__tar.gz → 0.2.3__tar.gz - Mend

pylantir 0.2.1tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{pylantir-0.2.1 → pylantir-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pylantir
-Version: 0.2.1
+Version: 0.2.3
 Summary: Python - DICOM Modality WorkList with Optional API
 Author-email: Milton Camacho <miltoncamachoicc@gmail.com>
 Requires-Python: >=3.11.1
@@ -86,13 +86,15 @@ pip install pylantir[api]
 ```
 Includes: FastAPI, Uvicorn, JWT authentication, password hashing
-#### Memory Monitoring
+#### Memory Monitoring (Recommended for Production)
 For enhanced memory usage monitoring and cleanup during REDCap synchronization:
 ```bash
 pip install pylantir[monitoring]
 ```
 Includes: psutil for system resource monitoring
+**Note**: While memory cleanup functions work without psutil, you need it installed to see cleanup effectiveness in logs. Without psutil, logs will show high-water mark memory values that don't decrease, even though cleanup is working. For production deployments, installing `[monitoring]` is **highly recommended** to validate memory stability.
 #### Big Data Processing
 For Spark-based data processing capabilities:
 ```bash

{pylantir-0.2.1 → pylantir-0.2.3}/README.md RENAMED Viewed

@@ -30,13 +30,15 @@ pip install pylantir[api]
 ```
 Includes: FastAPI, Uvicorn, JWT authentication, password hashing
-#### Memory Monitoring
+#### Memory Monitoring (Recommended for Production)
 For enhanced memory usage monitoring and cleanup during REDCap synchronization:
 ```bash
 pip install pylantir[monitoring]
 ```
 Includes: psutil for system resource monitoring
+**Note**: While memory cleanup functions work without psutil, you need it installed to see cleanup effectiveness in logs. Without psutil, logs will show high-water mark memory values that don't decrease, even though cleanup is working. For production deployments, installing `[monitoring]` is **highly recommended** to validate memory stability.
 #### Big Data Processing
 For Spark-based data processing capabilities:
 ```bash

{pylantir-0.2.1 → pylantir-0.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
 [project]
 name =  "pylantir"
-version = "0.2.1"
+version = "0.2.3"
 authors = [
     {name = "Milton Camacho", email = "miltoncamachoicc@gmail.com"},
 ]

{pylantir-0.2.1 → pylantir-0.2.3}/src/pylantir/redcap_to_db.py RENAMED Viewed

@@ -55,14 +55,23 @@ def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
     datetime_interval = datetime_now - timedelta(seconds=interval)
     records = project.export_records(fields=redcap_fields, date_begin=datetime_interval, date_end=datetime_now, format_type="df")
+    # Clean up PyCap Project immediately after export to free API client cache
+    del project
+    gc.collect()
     if records.empty:
         lgr.warning("No records retrieved from REDCap.")
+        # Explicitly clean up the empty DataFrame to release any allocated buffers
+        del records
+        gc.collect()
         return []
     filtered_records = []
     # Group by 'record_id' (index level 0)
-    for record_id, group in records.groupby(level=0):
+    # Convert to list to avoid holding groupby iterator reference
+    record_groups = list(records.groupby(level=0))
+    for record_id, group in record_groups:
         # Try to get baseline (non-repeated instrument) values
         baseline_rows = group[group['redcap_repeat_instrument'].isna()]
@@ -90,6 +99,11 @@ def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
             filtered_records.append(record)
+    # Explicitly clean up DataFrame and groupby list to free memory
+    del record_groups
+    del records
+    gc.collect()
     return filtered_records
 # TODO: Implement age binning for paricipants
@@ -164,11 +178,11 @@ def cleanup_memory_and_connections():
             lgr.debug("Disposing database connection pool")
             engine.pool.dispose()
-        # 3. Force Python garbage collection
-        # Run multiple times to catch circular references
-        collected = 0
-        for _ in range(3):
-            collected += gc.collect()
+        # 3. Force Python garbage collection targeting all generations
+        # Target generation 2 (oldest) first to catch long-lived objects
+        collected = gc.collect(generation=2)  # Oldest generation
+        collected += gc.collect(generation=1)  # Middle generation
+        collected += gc.collect(generation=0)  # Youngest generation
         # 4. Clear any cached SQLAlchemy metadata
         if hasattr(engine, 'pool'):
@@ -178,20 +192,17 @@ def cleanup_memory_and_connections():
         # Get memory usage after cleanup
         memory_after = get_memory_usage()
-        # Log cleanup results
-        if memory_before and memory_after:
-            if 'rss_mb' in memory_before:
-                memory_freed = memory_before['rss_mb'] - memory_after['rss_mb']
-                lgr.info(f"Memory cleanup completed. "
-                        f"Before: {memory_before['rss_mb']}MB, "
-                        f"After: {memory_after['rss_mb']}MB, "
-                        f"Freed: {memory_freed:.2f}MB, "
-                        f"Collected {collected} objects")
-            else:
-                lgr.info(f"Memory cleanup completed. Collected {collected} objects. "
-                        f"Memory stats: {memory_after}")
+        # Log cleanup results with simplified, focused metrics
+        if memory_before and memory_after and 'rss_mb' in memory_before:
+            freed = memory_before['rss_mb'] - memory_after['rss_mb']
+            lgr.info(
+                f"Memory cleanup: Before={memory_before['rss_mb']:.1f}MB, "
+                f"After={memory_after['rss_mb']:.1f}MB, "
+                f"Freed={freed:.1f}MB, "
+                f"Objects={collected}"
+            )
         else:
-            lgr.info(f"Memory cleanup completed. Collected {collected} objects")
+            lgr.info(f"Memory cleanup: Collected {collected} objects")
     except Exception as e:
         lgr.error(f"Error during cleanup: {e}")
@@ -328,6 +339,8 @@ def sync_redcap_to_db(
     finally:
         # Always ensure session is properly closed
         if session:
+            # Detach all ORM objects from session to clear identity map
+            session.expunge_all()
             session.close()
         # Perform cleanup after sync