pylantir 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pylantir
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Python - DICOM Modality WorkList with Optional API
5
5
  Author-email: Milton Camacho <miltoncamachoicc@gmail.com>
6
6
  Requires-Python: >=3.11.1
@@ -86,13 +86,15 @@ pip install pylantir[api]
86
86
  ```
87
87
  Includes: FastAPI, Uvicorn, JWT authentication, password hashing
88
88
 
89
- #### Memory Monitoring
89
+ #### Memory Monitoring (Recommended for Production)
90
90
  For enhanced memory usage monitoring and cleanup during REDCap synchronization:
91
91
  ```bash
92
92
  pip install pylantir[monitoring]
93
93
  ```
94
94
  Includes: psutil for system resource monitoring
95
95
 
96
+ **Note**: While memory cleanup functions work without psutil, you need it installed to see cleanup effectiveness in logs. Without psutil, logs will show high-water mark memory values that don't decrease, even though cleanup is working. For production deployments, installing `[monitoring]` is **highly recommended** to validate memory stability.
97
+
96
98
  #### Big Data Processing
97
99
  For Spark-based data processing capabilities:
98
100
  ```bash
@@ -30,13 +30,15 @@ pip install pylantir[api]
30
30
  ```
31
31
  Includes: FastAPI, Uvicorn, JWT authentication, password hashing
32
32
 
33
- #### Memory Monitoring
33
+ #### Memory Monitoring (Recommended for Production)
34
34
  For enhanced memory usage monitoring and cleanup during REDCap synchronization:
35
35
  ```bash
36
36
  pip install pylantir[monitoring]
37
37
  ```
38
38
  Includes: psutil for system resource monitoring
39
39
 
40
+ **Note**: While memory cleanup functions work without psutil, you need it installed to see cleanup effectiveness in logs. Without psutil, logs will show high-water mark memory values that don't decrease, even though cleanup is working. For production deployments, installing `[monitoring]` is **highly recommended** to validate memory stability.
41
+
40
42
  #### Big Data Processing
41
43
  For Spark-based data processing capabilities:
42
44
  ```bash
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
4
4
 
5
5
  [project]
6
6
  name = "pylantir"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  authors = [
9
9
  {name = "Milton Camacho", email = "miltoncamachoicc@gmail.com"},
10
10
  ]
@@ -55,6 +55,10 @@ def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
55
55
  datetime_interval = datetime_now - timedelta(seconds=interval)
56
56
  records = project.export_records(fields=redcap_fields, date_begin=datetime_interval, date_end=datetime_now, format_type="df")
57
57
 
58
+ # Clean up PyCap Project immediately after export to free API client cache
59
+ del project
60
+ gc.collect()
61
+
58
62
  if records.empty:
59
63
  lgr.warning("No records retrieved from REDCap.")
60
64
  return []
@@ -62,7 +66,9 @@ def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
62
66
  filtered_records = []
63
67
 
64
68
  # Group by 'record_id' (index level 0)
65
- for record_id, group in records.groupby(level=0):
69
+ # Convert to list to avoid holding groupby iterator reference
70
+ record_groups = list(records.groupby(level=0))
71
+ for record_id, group in record_groups:
66
72
 
67
73
  # Try to get baseline (non-repeated instrument) values
68
74
  baseline_rows = group[group['redcap_repeat_instrument'].isna()]
@@ -90,6 +96,11 @@ def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
90
96
 
91
97
  filtered_records.append(record)
92
98
 
99
+ # Explicitly clean up DataFrame and groupby list to free memory
100
+ del record_groups
101
+ del records
102
+ gc.collect()
103
+
93
104
  return filtered_records
94
105
 
95
106
  # TODO: Implement age binning for paricipants
@@ -164,11 +175,11 @@ def cleanup_memory_and_connections():
164
175
  lgr.debug("Disposing database connection pool")
165
176
  engine.pool.dispose()
166
177
 
167
- # 3. Force Python garbage collection
168
- # Run multiple times to catch circular references
169
- collected = 0
170
- for _ in range(3):
171
- collected += gc.collect()
178
+ # 3. Force Python garbage collection targeting all generations
179
+ # Target generation 2 (oldest) first to catch long-lived objects
180
+ collected = gc.collect(generation=2) # Oldest generation
181
+ collected += gc.collect(generation=1) # Middle generation
182
+ collected += gc.collect(generation=0) # Youngest generation
172
183
 
173
184
  # 4. Clear any cached SQLAlchemy metadata
174
185
  if hasattr(engine, 'pool'):
@@ -178,20 +189,17 @@ def cleanup_memory_and_connections():
178
189
  # Get memory usage after cleanup
179
190
  memory_after = get_memory_usage()
180
191
 
181
- # Log cleanup results
182
- if memory_before and memory_after:
183
- if 'rss_mb' in memory_before:
184
- memory_freed = memory_before['rss_mb'] - memory_after['rss_mb']
185
- lgr.info(f"Memory cleanup completed. "
186
- f"Before: {memory_before['rss_mb']}MB, "
187
- f"After: {memory_after['rss_mb']}MB, "
188
- f"Freed: {memory_freed:.2f}MB, "
189
- f"Collected {collected} objects")
190
- else:
191
- lgr.info(f"Memory cleanup completed. Collected {collected} objects. "
192
- f"Memory stats: {memory_after}")
192
+ # Log cleanup results with simplified, focused metrics
193
+ if memory_before and memory_after and 'rss_mb' in memory_before:
194
+ freed = memory_before['rss_mb'] - memory_after['rss_mb']
195
+ lgr.info(
196
+ f"Memory cleanup: Before={memory_before['rss_mb']:.1f}MB, "
197
+ f"After={memory_after['rss_mb']:.1f}MB, "
198
+ f"Freed={freed:.1f}MB, "
199
+ f"Objects={collected}"
200
+ )
193
201
  else:
194
- lgr.info(f"Memory cleanup completed. Collected {collected} objects")
202
+ lgr.info(f"Memory cleanup: Collected {collected} objects")
195
203
 
196
204
  except Exception as e:
197
205
  lgr.error(f"Error during cleanup: {e}")
@@ -328,6 +336,8 @@ def sync_redcap_to_db(
328
336
  finally:
329
337
  # Always ensure session is properly closed
330
338
  if session:
339
+ # Detach all ORM objects from session to clear identity map
340
+ session.expunge_all()
331
341
  session.close()
332
342
 
333
343
  # Perform cleanup after sync
File without changes