pylantir 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pylantir
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Python - DICOM Modality WorkList with Optional API
5
5
  Author-email: Milton Camacho <miltoncamachoicc@gmail.com>
6
6
  Requires-Python: >=3.11.1
@@ -86,13 +86,15 @@ pip install pylantir[api]
86
86
  ```
87
87
  Includes: FastAPI, Uvicorn, JWT authentication, password hashing
88
88
 
89
- #### Memory Monitoring
89
+ #### Memory Monitoring (Recommended for Production)
90
90
  For enhanced memory usage monitoring and cleanup during REDCap synchronization:
91
91
  ```bash
92
92
  pip install pylantir[monitoring]
93
93
  ```
94
94
  Includes: psutil for system resource monitoring
95
95
 
96
+ **Note**: While memory cleanup functions work without psutil, you need it installed to see cleanup effectiveness in logs. Without psutil, logs will show high-water mark memory values that don't decrease, even though cleanup is working. For production deployments, installing `[monitoring]` is **highly recommended** to validate memory stability.
97
+
96
98
  #### Big Data Processing
97
99
  For Spark-based data processing capabilities:
98
100
  ```bash
@@ -30,13 +30,15 @@ pip install pylantir[api]
30
30
  ```
31
31
  Includes: FastAPI, Uvicorn, JWT authentication, password hashing
32
32
 
33
- #### Memory Monitoring
33
+ #### Memory Monitoring (Recommended for Production)
34
34
  For enhanced memory usage monitoring and cleanup during REDCap synchronization:
35
35
  ```bash
36
36
  pip install pylantir[monitoring]
37
37
  ```
38
38
  Includes: psutil for system resource monitoring
39
39
 
40
+ **Note**: While memory cleanup functions work without psutil, you need it installed to see cleanup effectiveness in logs. Without psutil, logs will show high-water mark memory values that don't decrease, even though cleanup is working. For production deployments, installing `[monitoring]` is **highly recommended** to validate memory stability.
41
+
40
42
  #### Big Data Processing
41
43
  For Spark-based data processing capabilities:
42
44
  ```bash
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
4
4
 
5
5
  [project]
6
6
  name = "pylantir"
7
- version = "0.2.1"
7
+ version = "0.2.3"
8
8
  authors = [
9
9
  {name = "Milton Camacho", email = "miltoncamachoicc@gmail.com"},
10
10
  ]
@@ -55,14 +55,23 @@ def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
55
55
  datetime_interval = datetime_now - timedelta(seconds=interval)
56
56
  records = project.export_records(fields=redcap_fields, date_begin=datetime_interval, date_end=datetime_now, format_type="df")
57
57
 
58
+ # Clean up PyCap Project immediately after export to free API client cache
59
+ del project
60
+ gc.collect()
61
+
58
62
  if records.empty:
59
63
  lgr.warning("No records retrieved from REDCap.")
64
+ # Explicitly clean up the empty DataFrame to release any allocated buffers
65
+ del records
66
+ gc.collect()
60
67
  return []
61
68
 
62
69
  filtered_records = []
63
70
 
64
71
  # Group by 'record_id' (index level 0)
65
- for record_id, group in records.groupby(level=0):
72
+ # Convert to list to avoid holding groupby iterator reference
73
+ record_groups = list(records.groupby(level=0))
74
+ for record_id, group in record_groups:
66
75
 
67
76
  # Try to get baseline (non-repeated instrument) values
68
77
  baseline_rows = group[group['redcap_repeat_instrument'].isna()]
@@ -90,6 +99,11 @@ def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
90
99
 
91
100
  filtered_records.append(record)
92
101
 
102
+ # Explicitly clean up DataFrame and groupby list to free memory
103
+ del record_groups
104
+ del records
105
+ gc.collect()
106
+
93
107
  return filtered_records
94
108
 
95
109
  # TODO: Implement age binning for paricipants
@@ -164,11 +178,11 @@ def cleanup_memory_and_connections():
164
178
  lgr.debug("Disposing database connection pool")
165
179
  engine.pool.dispose()
166
180
 
167
- # 3. Force Python garbage collection
168
- # Run multiple times to catch circular references
169
- collected = 0
170
- for _ in range(3):
171
- collected += gc.collect()
181
+ # 3. Force Python garbage collection targeting all generations
182
+ # Target generation 2 (oldest) first to catch long-lived objects
183
+ collected = gc.collect(generation=2) # Oldest generation
184
+ collected += gc.collect(generation=1) # Middle generation
185
+ collected += gc.collect(generation=0) # Youngest generation
172
186
 
173
187
  # 4. Clear any cached SQLAlchemy metadata
174
188
  if hasattr(engine, 'pool'):
@@ -178,20 +192,17 @@ def cleanup_memory_and_connections():
178
192
  # Get memory usage after cleanup
179
193
  memory_after = get_memory_usage()
180
194
 
181
- # Log cleanup results
182
- if memory_before and memory_after:
183
- if 'rss_mb' in memory_before:
184
- memory_freed = memory_before['rss_mb'] - memory_after['rss_mb']
185
- lgr.info(f"Memory cleanup completed. "
186
- f"Before: {memory_before['rss_mb']}MB, "
187
- f"After: {memory_after['rss_mb']}MB, "
188
- f"Freed: {memory_freed:.2f}MB, "
189
- f"Collected {collected} objects")
190
- else:
191
- lgr.info(f"Memory cleanup completed. Collected {collected} objects. "
192
- f"Memory stats: {memory_after}")
195
+ # Log cleanup results with simplified, focused metrics
196
+ if memory_before and memory_after and 'rss_mb' in memory_before:
197
+ freed = memory_before['rss_mb'] - memory_after['rss_mb']
198
+ lgr.info(
199
+ f"Memory cleanup: Before={memory_before['rss_mb']:.1f}MB, "
200
+ f"After={memory_after['rss_mb']:.1f}MB, "
201
+ f"Freed={freed:.1f}MB, "
202
+ f"Objects={collected}"
203
+ )
193
204
  else:
194
- lgr.info(f"Memory cleanup completed. Collected {collected} objects")
205
+ lgr.info(f"Memory cleanup: Collected {collected} objects")
195
206
 
196
207
  except Exception as e:
197
208
  lgr.error(f"Error during cleanup: {e}")
@@ -328,6 +339,8 @@ def sync_redcap_to_db(
328
339
  finally:
329
340
  # Always ensure session is properly closed
330
341
  if session:
342
+ # Detach all ORM objects from session to clear identity map
343
+ session.expunge_all()
331
344
  session.close()
332
345
 
333
346
  # Perform cleanup after sync
File without changes