pylantir 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylantir/api_server.py +13 -9
- pylantir/cli/run.py +307 -41
- pylantir/config/calpendo_config_example.json +65 -0
- pylantir/config/mwl_config.json +3 -1
- pylantir/data_sources/__init__.py +84 -0
- pylantir/data_sources/base.py +117 -0
- pylantir/data_sources/calpendo_plugin.py +702 -0
- pylantir/data_sources/redcap_plugin.py +367 -0
- pylantir/db_setup.py +3 -0
- pylantir/models.py +3 -0
- pylantir/populate_db.py +6 -3
- pylantir/redcap_to_db.py +128 -81
- {pylantir-0.2.3.dist-info → pylantir-0.3.1.dist-info}/METADATA +316 -33
- pylantir-0.3.1.dist-info/RECORD +25 -0
- pylantir-0.2.3.dist-info/RECORD +0 -20
- {pylantir-0.2.3.dist-info → pylantir-0.3.1.dist-info}/WHEEL +0 -0
- {pylantir-0.2.3.dist-info → pylantir-0.3.1.dist-info}/entry_points.txt +0 -0
- {pylantir-0.2.3.dist-info → pylantir-0.3.1.dist-info}/licenses/LICENSE +0 -0
pylantir/redcap_to_db.py
CHANGED
|
@@ -1,6 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LEGACY MODULE - Backward Compatibility Wrapper for REDCapPlugin
|
|
3
|
+
|
|
4
|
+
This module provides backward-compatible function signatures that internally
|
|
5
|
+
delegate to the new plugin-based architecture. Existing code can continue
|
|
6
|
+
calling these functions without modification.
|
|
7
|
+
|
|
8
|
+
MIGRATION PATH: New code should use src/pylantir/data_sources/redcap_plugin.py
|
|
9
|
+
directly instead of these legacy wrappers.
|
|
10
|
+
"""
|
|
1
11
|
import os
|
|
2
12
|
import logging
|
|
3
|
-
import pandas as pd
|
|
4
13
|
from redcap import Project
|
|
5
14
|
import uuid
|
|
6
15
|
from sqlalchemy.orm import sessionmaker
|
|
@@ -13,6 +22,13 @@ import gc
|
|
|
13
22
|
|
|
14
23
|
lgr = logging.getLogger(__name__)
|
|
15
24
|
|
|
25
|
+
# Import the new plugin system
|
|
26
|
+
from .data_sources.redcap_plugin import REDCapPlugin
|
|
27
|
+
from .data_sources.base import PluginError
|
|
28
|
+
|
|
29
|
+
# NOTE: pandas import removed - we use native Python dicts/lists to avoid
|
|
30
|
+
# DataFrame memory overhead (50-100x memory reduction per sync cycle)
|
|
31
|
+
|
|
16
32
|
# Optional memory monitoring (install with: pip install psutil)
|
|
17
33
|
try:
|
|
18
34
|
import psutil
|
|
@@ -33,80 +49,71 @@ Session = sessionmaker(bind=engine)
|
|
|
33
49
|
|
|
34
50
|
|
|
35
51
|
def fetch_redcap_entries(redcap_fields: list, interval: float) -> list:
|
|
36
|
-
"""
|
|
37
|
-
|
|
52
|
+
"""
|
|
53
|
+
LEGACY WRAPPER: Fetch REDCap entries using PyCap and return a list of filtered dicts.
|
|
38
54
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
55
|
+
**DEPRECATION NOTICE**: This function is deprecated and maintained only for
|
|
56
|
+
backward compatibility. New code should use REDCapPlugin.fetch_entries() directly
|
|
57
|
+
from src/pylantir/data_sources/redcap_plugin.py
|
|
42
58
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
redcap_fields = [field for field in redcap_fields if field in valid_fields]
|
|
59
|
+
This function now delegates to REDCapPlugin for consistency with the new
|
|
60
|
+
plugin architecture. Existing callers can continue using this signature.
|
|
46
61
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
62
|
+
Args:
|
|
63
|
+
redcap_fields: List of REDCap field names to fetch
|
|
64
|
+
interval: Time window in seconds to fetch records from
|
|
50
65
|
|
|
51
|
-
|
|
66
|
+
Returns:
|
|
67
|
+
List of filtered MRI record dictionaries
|
|
52
68
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
69
|
+
MIGRATION PATH: Use REDCapPlugin directly:
|
|
70
|
+
```python
|
|
71
|
+
from pylantir.data_sources.redcap_plugin import REDCapPlugin
|
|
72
|
+
plugin = REDCapPlugin(name, config, field_mapping)
|
|
73
|
+
entries = plugin.fetch_entries(since=datetime_interval)
|
|
74
|
+
```
|
|
75
|
+
"""
|
|
76
|
+
lgr.warning(
|
|
77
|
+
"fetch_redcap_entries() is deprecated. "
|
|
78
|
+
"Use REDCapPlugin from src/pylantir/data_sources/redcap_plugin.py instead."
|
|
79
|
+
)
|
|
57
80
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
81
|
+
try:
|
|
82
|
+
# Build plugin configuration from environment variables
|
|
83
|
+
config = {
|
|
84
|
+
"site_id": "default", # Legacy calls don't have site_id
|
|
85
|
+
"protocol": "DEFAULT_PROTOCOL"
|
|
86
|
+
}
|
|
61
87
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
# Explicitly clean up the empty DataFrame to release any allocated buffers
|
|
65
|
-
del records
|
|
66
|
-
gc.collect()
|
|
67
|
-
return []
|
|
88
|
+
# Create field mapping for plugin (maps REDCap field name to itself)
|
|
89
|
+
field_mapping = {field: field for field in redcap_fields}
|
|
68
90
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
mri_rows = group[
|
|
82
|
-
(group["redcap_repeat_instrument"] == "mri") &
|
|
83
|
-
(group.get("mri_instance").notna()) &
|
|
84
|
-
(group.get("mri_instance") != "" ) &
|
|
85
|
-
(group.get("mri_date").notna()) &
|
|
86
|
-
(group.get("mri_time").notna())
|
|
87
|
-
]
|
|
88
|
-
|
|
89
|
-
for _, mri_row in mri_rows.iterrows():
|
|
90
|
-
record = {"record_id": record_id}
|
|
91
|
-
|
|
92
|
-
# Merge fields from baseline and mri_row, only include requested fields
|
|
93
|
-
for field in redcap_fields:
|
|
94
|
-
record[field] = (
|
|
95
|
-
mri_row.get(field)
|
|
96
|
-
if pd.notna(mri_row.get(field))
|
|
97
|
-
else baseline_row.get(field)
|
|
98
|
-
)
|
|
91
|
+
# Instantiate plugin (no arguments)
|
|
92
|
+
plugin = REDCapPlugin()
|
|
93
|
+
|
|
94
|
+
# Validate configuration
|
|
95
|
+
is_valid, error_msg = plugin.validate_config(config)
|
|
96
|
+
if not is_valid:
|
|
97
|
+
lgr.error(f"Plugin configuration validation failed: {error_msg}")
|
|
98
|
+
return []
|
|
99
|
+
|
|
100
|
+
# Fetch entries using plugin
|
|
101
|
+
datetime_now = datetime.now()
|
|
102
|
+
datetime_interval = datetime_now - timedelta(seconds=interval)
|
|
99
103
|
|
|
100
|
-
|
|
104
|
+
entries = plugin.fetch_entries(field_mapping=field_mapping, interval=interval)
|
|
101
105
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
del records
|
|
105
|
-
gc.collect()
|
|
106
|
-
|
|
107
|
-
return filtered_records
|
|
106
|
+
# Cleanup plugin resources
|
|
107
|
+
plugin.cleanup()
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
return entries
|
|
110
|
+
|
|
111
|
+
except PluginError as e:
|
|
112
|
+
lgr.error(f"Plugin error in legacy fetch_redcap_entries: {e}")
|
|
113
|
+
return []
|
|
114
|
+
except Exception as e:
|
|
115
|
+
lgr.error(f"Unexpected error in legacy fetch_redcap_entries: {e}")
|
|
116
|
+
return []# TODO: Implement age binning for paricipants
|
|
110
117
|
def age_binning():
|
|
111
118
|
return None
|
|
112
119
|
|
|
@@ -163,35 +170,35 @@ def cleanup_memory_and_connections():
|
|
|
163
170
|
This function should be called after each synchronization cycle.
|
|
164
171
|
"""
|
|
165
172
|
lgr.debug("Starting memory and connection cleanup...")
|
|
166
|
-
|
|
173
|
+
|
|
167
174
|
# Get memory usage before cleanup
|
|
168
175
|
memory_before = get_memory_usage()
|
|
169
|
-
|
|
176
|
+
|
|
170
177
|
try:
|
|
171
178
|
# 1. Clear pandas cache and temporary objects
|
|
172
179
|
# Force garbage collection of pandas objects
|
|
173
180
|
gc.collect()
|
|
174
|
-
|
|
181
|
+
|
|
175
182
|
# 2. Close any idle database connections in the pool
|
|
176
183
|
if hasattr(engine, 'pool'):
|
|
177
184
|
# Dispose of the connection pool to free up connections
|
|
178
185
|
lgr.debug("Disposing database connection pool")
|
|
179
186
|
engine.pool.dispose()
|
|
180
|
-
|
|
187
|
+
|
|
181
188
|
# 3. Force Python garbage collection targeting all generations
|
|
182
189
|
# Target generation 2 (oldest) first to catch long-lived objects
|
|
183
190
|
collected = gc.collect(generation=2) # Oldest generation
|
|
184
191
|
collected += gc.collect(generation=1) # Middle generation
|
|
185
192
|
collected += gc.collect(generation=0) # Youngest generation
|
|
186
|
-
|
|
193
|
+
|
|
187
194
|
# 4. Clear any cached SQLAlchemy metadata
|
|
188
195
|
if hasattr(engine, 'pool'):
|
|
189
196
|
# Recreate the pool with fresh connections
|
|
190
197
|
engine.pool.recreate()
|
|
191
|
-
|
|
198
|
+
|
|
192
199
|
# Get memory usage after cleanup
|
|
193
200
|
memory_after = get_memory_usage()
|
|
194
|
-
|
|
201
|
+
|
|
195
202
|
# Log cleanup results with simplified, focused metrics
|
|
196
203
|
if memory_before and memory_after and 'rss_mb' in memory_before:
|
|
197
204
|
freed = memory_before['rss_mb'] - memory_after['rss_mb']
|
|
@@ -203,7 +210,7 @@ def cleanup_memory_and_connections():
|
|
|
203
210
|
)
|
|
204
211
|
else:
|
|
205
212
|
lgr.info(f"Memory cleanup: Collected {collected} objects")
|
|
206
|
-
|
|
213
|
+
|
|
207
214
|
except Exception as e:
|
|
208
215
|
lgr.error(f"Error during cleanup: {e}")
|
|
209
216
|
# Don't let cleanup errors stop the main process
|
|
@@ -215,8 +222,29 @@ def sync_redcap_to_db(
|
|
|
215
222
|
protocol: dict,
|
|
216
223
|
redcap2wl: dict,
|
|
217
224
|
interval: float = 60.0,
|
|
225
|
+
source_name: str = None,
|
|
218
226
|
) -> None:
|
|
219
|
-
"""
|
|
227
|
+
"""
|
|
228
|
+
LEGACY WRAPPER: Sync REDCap patient data with the worklist database.
|
|
229
|
+
|
|
230
|
+
**DEPRECATION NOTICE**: This function is deprecated and maintained only for
|
|
231
|
+
backward compatibility. New code should use the plugin-based architecture
|
|
232
|
+
from src/pylantir/data_sources/
|
|
233
|
+
|
|
234
|
+
NOTE: This function now uses REDCapPlugin internally via fetch_redcap_entries()
|
|
235
|
+
wrapper, ensuring consistent behavior with the new plugin architecture.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
site_id: Site identifier
|
|
239
|
+
protocol: Protocol mapping dictionary
|
|
240
|
+
redcap2wl: Field mapping dictionary
|
|
241
|
+
interval: Sync interval in seconds
|
|
242
|
+
source_name: Optional data source name for tracking (new in v0.3.0)
|
|
243
|
+
"""
|
|
244
|
+
lgr.warning(
|
|
245
|
+
"sync_redcap_to_db() is deprecated. "
|
|
246
|
+
"Use data_sources configuration with REDCapPlugin instead."
|
|
247
|
+
)
|
|
220
248
|
|
|
221
249
|
if not redcap2wl:
|
|
222
250
|
lgr.error("No field mapping (redcap2wl) provided for syncing.")
|
|
@@ -241,6 +269,7 @@ def sync_redcap_to_db(
|
|
|
241
269
|
if i not in redcap_fields:
|
|
242
270
|
redcap_fields.append(i)
|
|
243
271
|
|
|
272
|
+
# NOTE: fetch_redcap_entries() now delegates to REDCapPlugin internally
|
|
244
273
|
redcap_entries = fetch_redcap_entries(redcap_fields, interval)
|
|
245
274
|
|
|
246
275
|
for record in redcap_entries:
|
|
@@ -290,6 +319,9 @@ def sync_redcap_to_db(
|
|
|
290
319
|
existing_entry.modality = record.get("modality", "MR")
|
|
291
320
|
existing_entry.scheduled_start_date = record.get("mri_date")
|
|
292
321
|
existing_entry.scheduled_start_time = record.get("mri_time")
|
|
322
|
+
# Track data source if provided
|
|
323
|
+
if source_name:
|
|
324
|
+
existing_entry.data_source = source_name
|
|
293
325
|
# Dynamically update DICOM worklist fields from REDCap
|
|
294
326
|
for redcap_field, dicom_field in redcap2wl.items():
|
|
295
327
|
if redcap_field in record:
|
|
@@ -324,13 +356,14 @@ def sync_redcap_to_db(
|
|
|
324
356
|
# performing_physician=record.get("performing_physician"),
|
|
325
357
|
study_description=record.get("study_description", "CPIP"),
|
|
326
358
|
# station_name=record.get("station_name"),
|
|
327
|
-
performed_procedure_step_status="SCHEDULED"
|
|
359
|
+
performed_procedure_step_status="SCHEDULED",
|
|
360
|
+
data_source=source_name # Track which source created this entry
|
|
328
361
|
)
|
|
329
362
|
session.add(new_entry)
|
|
330
363
|
|
|
331
364
|
session.commit()
|
|
332
365
|
logging.info("REDCap data synchronized successfully with DICOM worklist database.")
|
|
333
|
-
|
|
366
|
+
|
|
334
367
|
except Exception as e:
|
|
335
368
|
lgr.error(f"Error during REDCap synchronization: {e}")
|
|
336
369
|
if session:
|
|
@@ -342,10 +375,10 @@ def sync_redcap_to_db(
|
|
|
342
375
|
# Detach all ORM objects from session to clear identity map
|
|
343
376
|
session.expunge_all()
|
|
344
377
|
session.close()
|
|
345
|
-
|
|
378
|
+
|
|
346
379
|
# Perform cleanup after sync
|
|
347
380
|
cleanup_memory_and_connections()
|
|
348
|
-
|
|
381
|
+
|
|
349
382
|
# Log memory usage after cleanup
|
|
350
383
|
memory_after = get_memory_usage()
|
|
351
384
|
if memory_after:
|
|
@@ -358,12 +391,22 @@ def sync_redcap_to_db_repeatedly(
|
|
|
358
391
|
redcap2wl=None,
|
|
359
392
|
interval=60,
|
|
360
393
|
operation_interval={"start_time": [00,00], "end_time": [23,59]},
|
|
394
|
+
source_name=None,
|
|
361
395
|
):
|
|
362
396
|
"""
|
|
363
|
-
Keep syncing with REDCap in a loop every `interval` seconds
|
|
364
|
-
|
|
365
|
-
|
|
397
|
+
LEGACY WRAPPER: Keep syncing with REDCap in a loop every `interval` seconds.
|
|
398
|
+
|
|
399
|
+
**DEPRECATION NOTICE**: This function is deprecated and maintained only for
|
|
400
|
+
backward compatibility. New code should use the plugin-based multi-source
|
|
401
|
+
orchestration from src/pylantir/cli/run.py
|
|
402
|
+
|
|
403
|
+
MIGRATION PATH: Configure data_sources array in mwl_config.json and use
|
|
404
|
+
the new orchestration system.
|
|
366
405
|
"""
|
|
406
|
+
lgr.warning(
|
|
407
|
+
"sync_redcap_to_db_repeatedly() is deprecated. "
|
|
408
|
+
"Use data_sources configuration with multi-source orchestration instead."
|
|
409
|
+
)
|
|
367
410
|
if operation_interval is None:
|
|
368
411
|
operation_interval = {"start_time": [0, 0], "end_time": [23, 59]}
|
|
369
412
|
|
|
@@ -399,6 +442,8 @@ def sync_redcap_to_db_repeatedly(
|
|
|
399
442
|
dt_end_yesterday = datetime.combine(yesterday, end_time)
|
|
400
443
|
dt_start_today = datetime.combine(today_date, start_time)
|
|
401
444
|
delta = dt_start_today - dt_end_yesterday
|
|
445
|
+
#terporary large interval to catch up on missed data
|
|
446
|
+
# delta = delta + timedelta(seconds=6000000)
|
|
402
447
|
# guaranteed to be positive if yesterday < today
|
|
403
448
|
extended_interval = delta.total_seconds()
|
|
404
449
|
logging.info(f"Using extended interval: {extended_interval}, {interval} seconds until next sync.")
|
|
@@ -417,6 +462,7 @@ def sync_redcap_to_db_repeatedly(
|
|
|
417
462
|
protocol=protocol,
|
|
418
463
|
redcap2wl=redcap2wl,
|
|
419
464
|
interval=extended_interval,
|
|
465
|
+
source_name=source_name,
|
|
420
466
|
)
|
|
421
467
|
else:
|
|
422
468
|
sync_redcap_to_db(
|
|
@@ -424,6 +470,7 @@ def sync_redcap_to_db_repeatedly(
|
|
|
424
470
|
protocol=protocol,
|
|
425
471
|
redcap2wl=redcap2wl,
|
|
426
472
|
interval=interval_sync,
|
|
473
|
+
source_name=source_name,
|
|
427
474
|
)
|
|
428
475
|
last_sync_date = today_date
|
|
429
476
|
logging.debug(f"REDCap sync completed at {now_time}. Next sync atempt in {interval} seconds.")
|
|
@@ -440,7 +487,7 @@ def sync_redcap_to_db_repeatedly(
|
|
|
440
487
|
f"Current time {now_time} is outside operation window "
|
|
441
488
|
f"({start_time}–{end_time}). Sleeping for {interval} seconds."
|
|
442
489
|
)
|
|
443
|
-
|
|
490
|
+
|
|
444
491
|
# Run periodic cleanup even during off-hours to prevent memory buildup
|
|
445
492
|
# Only run every 10th cycle to avoid excessive overhead
|
|
446
493
|
if (now_dt.hour == 3 and now_dt.minute == 0): # Daily cleanup at 3 AM
|