medicafe 0.250722.0__py3-none-any.whl → 0.250723.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of medicafe might be problematic. Click here for more details.

@@ -1,780 +1,796 @@
1
- #MediBot_Preprocessor_lib.py
2
- from collections import OrderedDict, defaultdict
3
- from datetime import datetime, timedelta
4
- import os, csv, sys
5
- import chardet # Ensure chardet is imported
6
-
7
- # Add the parent directory of the project to the Python path
8
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
-
10
- # Configuration cache to avoid repeated loading
11
- _config_cache = None
12
- _crosswalk_cache = None
13
-
14
- # Attempt to import necessary modules, falling back if they are not found
15
- try:
16
- import MediLink_ConfigLoader
17
- import MediLink_DataMgmt
18
- except ImportError:
19
- from MediLink import MediLink_ConfigLoader, MediLink_DataMgmt
20
-
21
- try:
22
- from MediBot_UI import app_control
23
- from MediBot_docx_decoder import parse_docx
24
- except ImportError:
25
- from MediBot import MediBot_UI
26
- app_control = MediBot_UI.app_control
27
- from MediBot import MediBot_docx_decoder
28
- parse_docx = MediBot_docx_decoder.parse_docx
29
-
30
- class InitializationError(Exception):
31
- def __init__(self, message):
32
- self.message = message
33
- super().__init__(self.message)
34
-
35
- def initialize(config):
36
- global AHK_EXECUTABLE, CSV_FILE_PATH, field_mapping, page_end_markers
37
-
38
- required_keys = {
39
- 'AHK_EXECUTABLE': "",
40
- 'CSV_FILE_PATH': "",
41
- 'field_mapping': {},
42
- 'page_end_markers': []
43
- }
44
-
45
- for key, default in required_keys.items():
46
- try:
47
- globals()[key] = config.get(key, default) if key != 'field_mapping' else OrderedDict(config.get(key, default))
48
- except AttributeError:
49
- raise InitializationError("Error: '{}' not found in config.".format(key))
50
-
51
- def get_cached_configuration():
52
- """
53
- Returns cached configuration and crosswalk data to avoid repeated I/O operations.
54
- """
55
- global _config_cache, _crosswalk_cache
56
-
57
- if _config_cache is None or _crosswalk_cache is None:
58
- _config_cache, _crosswalk_cache = MediLink_ConfigLoader.load_configuration()
59
-
60
- return _config_cache, _crosswalk_cache
61
-
62
- def open_csv_for_editing(csv_file_path):
63
- try:
64
- # Open the CSV file with its associated application
65
- os.system('start "" "{}"'.format(csv_file_path))
66
- print("After saving the revised CSV, please re-run MediBot.")
67
- except Exception as e:
68
- print("Failed to open CSV file:", e)
69
-
70
- # Function to clean the headers
71
- def clean_header(headers):
72
- """
73
- Cleans the header strings by removing unwanted characters and trimming whitespace.
74
-
75
- Parameters:
76
- headers (list of str): The original header strings.
77
-
78
- Returns:
79
- list of str: The cleaned header strings.
80
- """
81
- cleaned_headers = []
82
-
83
- for header in headers:
84
- # Strip leading and trailing whitespace
85
- cleaned_header = header.strip()
86
- # Remove unwanted characters while keeping spaces, alphanumeric characters, hyphens, and underscores
87
- cleaned_header = ''.join(char for char in cleaned_header if char.isalnum() or char.isspace() or char in ['-', '_'])
88
- cleaned_headers.append(cleaned_header)
89
-
90
- # Log the original and cleaned headers for debugging
91
- MediLink_ConfigLoader.log("Original headers: {}".format(headers), level="INFO")
92
- MediLink_ConfigLoader.log("Cleaned headers: {}".format(cleaned_headers), level="INFO")
93
-
94
- # Check if 'Surgery Date' is in the cleaned headers
95
- if 'Surgery Date' not in cleaned_headers:
96
- MediLink_ConfigLoader.log("WARNING: 'Surgery Date' header not found after cleaning.", level="WARNING")
97
- print("WARNING: 'Surgery Date' header not found after cleaning.")
98
- raise ValueError("Error: 'Surgery Date' header not found after cleaning.")
99
-
100
- return cleaned_headers
101
-
102
- # Function to load and process CSV data
103
- def load_csv_data(csv_file_path):
104
- try:
105
- # Check if the file exists
106
- if not os.path.exists(csv_file_path):
107
- raise FileNotFoundError("***Error: CSV file '{}' not found.".format(csv_file_path))
108
-
109
- # Detect the file encoding
110
- with open(csv_file_path, 'rb') as f:
111
- raw_data = f.read()
112
- result = chardet.detect(raw_data)
113
- encoding = result['encoding']
114
- confidence = result['confidence']
115
- print("Detected encoding: {} (Confidence: {:.2f})".format(encoding, confidence))
116
-
117
- # Read the CSV file with the detected encoding
118
- with open(csv_file_path, 'r', encoding=encoding) as csvfile:
119
- reader = csv.DictReader(csvfile)
120
- # Clean the headers
121
- cleaned_headers = clean_header(reader.fieldnames)
122
-
123
- # Create a mapping of cleaned headers to original headers (pre-compute once)
124
- header_mapping = {cleaned_headers[i]: reader.fieldnames[i] for i in range(len(cleaned_headers))}
125
-
126
- # Process the remaining rows - optimize by pre-allocating the list
127
- csv_data = []
128
- # Pre-allocate list size if we can estimate it (optional optimization)
129
- # csv_data = [None] * estimated_size # if we had row count
130
-
131
- for row in reader:
132
- # Use dict() constructor with generator expression for better performance
133
- cleaned_row = dict((cleaned_headers[i], row[header_mapping[cleaned_headers[i]]])
134
- for i in range(len(cleaned_headers)))
135
- csv_data.append(cleaned_row)
136
-
137
- return csv_data # Return a list of dictionaries
138
- except FileNotFoundError as e:
139
- print(e) # Print the informative error message
140
- print("Hint: Check if CSV file is located in the expected directory or specify a different path in config file.")
141
- print("Please correct the issue and re-run MediBot.")
142
- sys.exit(1) # Halt the script
143
- except IOError as e:
144
- print("Error reading CSV file: {}. Please check the file path and permissions.".format(e))
145
- sys.exit(1) # Halt the script in case of other IO errors
146
-
147
- # CSV Pre-processor Helper functions
148
- def add_columns(csv_data, column_headers):
149
- """
150
- Adds one or multiple columns to the CSV data.
151
-
152
- Parameters:
153
- csv_data (list of dict): The CSV data where each row is represented as a dictionary.
154
- column_headers (list of str or str): A list of column headers to be added to each row, or a single column header.
155
-
156
- Returns:
157
- None: The function modifies the csv_data in place.
158
- """
159
- if isinstance(column_headers, str):
160
- column_headers = [column_headers]
161
- elif not isinstance(column_headers, list):
162
- raise ValueError("column_headers should be a list or a string")
163
-
164
- for row in csv_data:
165
- for header in column_headers:
166
- row[header] = '' # Initialize the column with empty values
167
-
168
- # Extracting the list to a variable for future refactoring:
169
- def filter_rows(csv_data):
170
- # TODO: This should be handled in the crosswalk.
171
- excluded_insurance = {'AETNA', 'AETNA MEDICARE', 'HUMANA MED HMO'}
172
- csv_data[:] = [row for row in csv_data if row.get('Patient ID') and row.get('Primary Insurance') not in excluded_insurance]
173
-
174
- def convert_surgery_date(csv_data):
175
- for row in csv_data:
176
- surgery_date_str = row.get('Surgery Date', '')
177
- if not surgery_date_str:
178
- MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
179
- # BUG This needs a cleaning step for the Surgery Date string in case we're receiving damaged data.
180
- row['Surgery Date'] = datetime.min # Assign a minimum datetime value if empty
181
- print("Surgery Date not found for row: {}".format(row))
182
- else:
183
- try:
184
- row['Surgery Date'] = datetime.strptime(surgery_date_str, '%m/%d/%Y')
185
- except ValueError as e:
186
- MediLink_ConfigLoader.log("Error parsing Surgery Date '{}': {} for row: {}".format(surgery_date_str, e, row), level="ERROR")
187
- row['Surgery Date'] = datetime.min # Assign a minimum datetime value if parsing fails
188
-
189
- def sort_and_deduplicate(csv_data):
190
- # Create a dictionary to hold unique patients based on Patient ID
191
- unique_patients = {}
192
-
193
- # Iterate through the CSV data and populate the unique_patients dictionary
194
- for row in csv_data:
195
- patient_id = row.get('Patient ID')
196
- if patient_id not in unique_patients:
197
- unique_patients[patient_id] = row
198
- else:
199
- # If the patient ID already exists, compare surgery dates
200
- existing_row = unique_patients[patient_id]
201
- if row['Surgery Date'] < existing_row['Surgery Date']:
202
- unique_patients[patient_id] = row
203
-
204
- # Convert the unique_patients dictionary back to a list and sort it
205
- csv_data[:] = sorted(unique_patients.values(), key=lambda x: (x['Surgery Date'], x.get('Patient Last', '').strip())) # TODO Does this need to be sorted twice? once before and once after?
206
-
207
- # TODO: Consider adding an option in the config to sort based on Surgery Schedules when available.
208
- # If no schedule is available, the current sorting strategy will be used.
209
-
210
- def combine_fields(csv_data):
211
- for row in csv_data:
212
- # Safely handle the 'Surgery Date' conversion
213
- surgery_date = row.get('Surgery Date')
214
- row['Surgery Date'] = surgery_date.strftime('%m/%d/%Y') if surgery_date else ''
215
-
216
- first_name = '_'.join(part.strip() for part in row.get('Patient First', '').split()) # Join the first name parts with underscores after cleaning.
217
- middle_name = row.get('Patient Middle', '').strip()
218
- middle_name = middle_name[0] if len(middle_name) > 1 else '' # Take only the first character or empty
219
- last_name = '_'.join(part.strip() for part in row.get('Patient Last', '').split()) # Join the last name parts with underscores after cleaning.
220
- row['Patient Name'] = ', '.join(filter(None, [last_name, first_name])) + (' ' + middle_name if middle_name else '') # Comma between last and first, space before middle
221
-
222
- address1 = row.get('Patient Address1', '').strip()
223
- address2 = row.get('Patient Address2', '').strip()
224
- row['Patient Street'] = ' '.join(filter(None, [address1, address2])) # Join non-empty addresses
225
-
226
- def apply_replacements(csv_data, crosswalk):
227
- replacements = crosswalk.get('csv_replacements', {})
228
- # Pre-define the keys to check for better performance
229
- keys_to_check = ['Patient SSN', 'Primary Insurance', 'Ins1 Payer ID']
230
-
231
- for row in csv_data:
232
- # Use early termination - check each replacement only if needed
233
- for old_value, new_value in replacements.items():
234
- replacement_made = False
235
- for key in keys_to_check:
236
- if row.get(key) == old_value:
237
- row[key] = new_value
238
- replacement_made = True
239
- break # Exit the key loop once a replacement is made
240
- if replacement_made:
241
- break # Exit the replacement loop once any replacement is made
242
-
243
- import difflib
244
- from collections import defaultdict
245
-
246
- def find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names):
247
- """
248
- Finds the best matching Medisoft ID for a given insurance name using fuzzy matching.
249
-
250
- Parameters:
251
- - insurance_name (str): The insurance name from the CSV row.
252
- - medisoft_ids (list): List of Medisoft IDs associated with the Payer ID.
253
- - medisoft_to_mains_names (dict): Mapping from Medisoft ID to list of MAINS names.
254
-
255
- Returns:
256
- - int or None: The best matching Medisoft ID or None if no match is found.
257
- """
258
- best_match_ratio = 0
259
- best_medisoft_id = None
260
-
261
- # Pre-process insurance name once
262
- processed_insurance = ''.join(c for c in insurance_name if not c.isdigit()).upper()
263
-
264
- for medisoft_id in medisoft_ids:
265
- mains_names = medisoft_to_mains_names.get(medisoft_id, [])
266
- for mains_name in mains_names:
267
- # Preprocess names by extracting non-numeric characters and converting to uppercase
268
- # Use more efficient string processing
269
- processed_mains = ''.join(c for c in mains_name if not c.isdigit()).upper()
270
-
271
- # Log the processed names before computing the match ratio
272
- MediLink_ConfigLoader.log("Processing Medisoft ID '{}': Comparing processed insurance '{}' with processed mains '{}'.".format(medisoft_id, processed_insurance, processed_mains), level="DEBUG")
273
-
274
- # Compute the similarity ratio
275
- match_ratio = difflib.SequenceMatcher(None, processed_insurance, processed_mains).ratio()
276
-
277
- # Log the match ratio
278
- MediLink_ConfigLoader.log("Match ratio for Medisoft ID '{}': {:.2f}".format(medisoft_id, match_ratio), level="DEBUG")
279
-
280
- if match_ratio > best_match_ratio:
281
- best_match_ratio = match_ratio
282
- best_medisoft_id = medisoft_id
283
- # Log the current best match
284
- MediLink_ConfigLoader.log("New best match found: Medisoft ID '{}' with match ratio {:.2f}".format(best_medisoft_id, best_match_ratio), level="DEBUG")
285
-
286
- # Log the final best match ratio and ID
287
- MediLink_ConfigLoader.log("Final best match ratio: {:.2f} for Medisoft ID '{}'".format(best_match_ratio, best_medisoft_id), level="DEBUG")
288
-
289
- # No threshold applied, return the best match found
290
- return best_medisoft_id
291
-
292
- def NEW_update_insurance_ids(csv_data, config, crosswalk):
293
- """
294
- Updates the 'Ins1 Insurance ID' field in each row of csv_data based on the crosswalk and MAINS data.
295
-
296
- Parameters:
297
- - csv_data (list of dict): The CSV data where each row is represented as a dictionary.
298
- - config (dict): Configuration object containing necessary paths and parameters.
299
- - crosswalk (dict): Crosswalk data containing mappings between Payer IDs and Medisoft IDs.
300
-
301
- Returns:
302
- - None: The function modifies the csv_data in place.
303
- """
304
- processed_payer_ids = set() # Track processed Payer IDs
305
- MediLink_ConfigLoader.log("Starting update of insurance IDs.", level="INFO")
306
-
307
- # Load MAINS data to get mapping from Medisoft ID to MAINS names
308
- insurance_to_id = load_insurance_data_from_mains(config) # Assuming it returns a dict mapping insurance names to IDs
309
- MediLink_ConfigLoader.log("Loaded MAINS data for insurance to ID mapping.", level="DEBUG")
310
-
311
- # Invert the mapping to get Medisoft ID to MAINS names
312
- medisoft_to_mains_names = defaultdict(list)
313
- for insurance_name, medisoft_id in insurance_to_id.items():
314
- medisoft_to_mains_names[medisoft_id].append(insurance_name)
315
-
316
- for row in csv_data:
317
- ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
318
- MediLink_ConfigLoader.log("Processing row with Ins1 Payer ID: '{}'.".format(ins1_payer_id), level="DEBUG")
319
-
320
- if ins1_payer_id:
321
- # Mark this Payer ID as processed
322
- if ins1_payer_id not in processed_payer_ids:
323
- processed_payer_ids.add(ins1_payer_id) # Add to set
324
- MediLink_ConfigLoader.log("Marked Payer ID '{}' as processed.".format(ins1_payer_id), level="DEBUG")
325
-
326
- # Retrieve Medisoft IDs for the current Payer ID
327
- medisoft_ids = crosswalk.get('payer_id', {}).get(ins1_payer_id, {}).get('medisoft_id', [])
328
- MediLink_ConfigLoader.log("Retrieved Medisoft IDs for Payer ID '{}': {}".format(ins1_payer_id, medisoft_ids), level="DEBUG")
329
-
330
- if not medisoft_ids:
331
- MediLink_ConfigLoader.log("No Medisoft IDs available for Payer ID '{}', creating placeholder entry.".format(ins1_payer_id), level="WARNING")
332
- # Create a placeholder entry in the crosswalk
333
- if 'payer_id' not in crosswalk:
334
- crosswalk['payer_id'] = {}
335
- crosswalk['payer_id'][ins1_payer_id] = {
336
- 'medisoft_id': [], # Placeholder for future Medisoft IDs
337
- 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
338
- 'endpoint': None # Placeholder for future endpoint
339
- }
340
- continue # Skip further processing for this Payer ID
341
-
342
- # If only one Medisoft ID is associated, assign it directly
343
- if len(medisoft_ids) == 1:
344
- try:
345
- medisoft_id = int(medisoft_ids[0])
346
- row['Ins1 Insurance ID'] = medisoft_id
347
- MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row number {} with Payer ID '{}'.".format(medisoft_id, csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
348
- except ValueError as e:
349
- MediLink_ConfigLoader.log("Error converting Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
350
- row['Ins1 Insurance ID'] = None
351
- continue # Move to the next row
352
-
353
- # If multiple Medisoft IDs are associated, perform fuzzy matching
354
- insurance_name = row.get('Primary Insurance', '').strip()
355
- if not insurance_name:
356
- MediLink_ConfigLoader.log("Row with Payer ID '{}' missing 'Primary Insurance', skipping assignment.".format(ins1_payer_id), level="WARNING")
357
- continue # Skip if insurance name is missing
358
-
359
- best_medisoft_id = find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names)
360
-
361
- if best_medisoft_id:
362
- row['Ins1 Insurance ID'] = best_medisoft_id
363
- MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row with Payer ID '{}' based on fuzzy match.".format(best_medisoft_id, ins1_payer_id), level="INFO")
364
- else:
365
- # Default to the first Medisoft ID if no good match is found
366
- try:
367
- default_medisoft_id = int(medisoft_ids[0])
368
- row['Ins1 Insurance ID'] = default_medisoft_id
369
- MediLink_ConfigLoader.log("No suitable match found. Defaulted to Medisoft ID '{}' for Payer ID '{}'.".format(default_medisoft_id, ins1_payer_id), level="INFO")
370
- except ValueError as e:
371
- MediLink_ConfigLoader.log("Error converting default Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
372
- row['Ins1 Insurance ID'] = None
373
-
374
- def update_insurance_ids(csv_data, config, crosswalk):
375
- MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
376
-
377
- # Create a dictionary to hold Medisoft IDs for each payer ID in the crosswalk
378
- payer_id_to_medisoft = {}
379
- MediLink_ConfigLoader.log("Initialized payer_id_to_medisoft dictionary.", level="DEBUG")
380
-
381
- # Populate the dictionary with data from the crosswalk
382
- for payer_id, details in crosswalk.get('payer_id', {}).items():
383
- medisoft_ids = details.get('medisoft_id', [])
384
- # Filter out empty strings and take the first valid ID
385
- medisoft_ids = [id for id in medisoft_ids if id]
386
- payer_id_to_medisoft[payer_id] = int(medisoft_ids[0]) if medisoft_ids else None
387
- MediLink_ConfigLoader.log("Processed Payer ID '{}': Medisoft IDs found: {}".format(payer_id, medisoft_ids), level="DEBUG")
388
-
389
- # Process the csv_data
390
- for row in csv_data:
391
- ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
392
- MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
393
-
394
- if ins1_payer_id not in payer_id_to_medisoft:
395
- # Add placeholder entry for new payer ID
396
- payer_id_to_medisoft[ins1_payer_id] = None # No Medisoft ID available
397
- crosswalk.setdefault('payer_id', {})[ins1_payer_id] = {
398
- 'medisoft_id': [], # Placeholder for future Medisoft IDs
399
- 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
400
- 'endpoint': None # Placeholder for future endpoint
401
- }
402
- MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
403
-
404
- # Assign the Medisoft ID to the row
405
- row['Ins1 Insurance ID'] = payer_id_to_medisoft[ins1_payer_id]
406
- MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row with Ins1 Payer ID '{}'.".format(row['Ins1 Insurance ID'], ins1_payer_id), level="DEBUG")
407
-
408
- def update_procedure_codes(csv_data, crosswalk):
409
-
410
- # Get Medisoft shorthand dictionary from crosswalk and reverse it
411
- diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {}) # BUG We need to be careful here in case we decide we need to change the crosswalk data specifically with regard to the T8/H usage.
412
- medisoft_to_diagnosis = {v: k for k, v in diagnosis_to_medisoft.items()}
413
-
414
- # Get procedure code to diagnosis dictionary from crosswalk and reverse it for easier lookup
415
- diagnosis_to_procedure = {
416
- diagnosis_code: procedure_code
417
- for procedure_code, diagnosis_codes in crosswalk.get('procedure_to_diagnosis', {}).items()
418
- for diagnosis_code in diagnosis_codes
419
- }
420
-
421
- # Initialize counter for updated rows
422
- updated_count = 0
423
-
424
- # Update the "Procedure Code" column in the CSV data
425
- for row_num, row in enumerate(csv_data, start=1):
426
- try:
427
- medisoft_code = row.get('Default Diagnosis #1', '').strip()
428
- diagnosis_code = medisoft_to_diagnosis.get(medisoft_code)
429
- if diagnosis_code:
430
- procedure_code = diagnosis_to_procedure.get(diagnosis_code)
431
- if procedure_code:
432
- row['Procedure Code'] = procedure_code
433
- updated_count += 1
434
- else:
435
- row['Procedure Code'] = "Unknown" # Or handle as appropriate
436
- else:
437
- row['Procedure Code'] = "Unknown" # Or handle as appropriate
438
- except Exception as e:
439
- MediLink_ConfigLoader.log("In update_procedure_codes, Error processing row {}: {}".format(row_num, e), level="ERROR")
440
-
441
- # Log total count of updated rows
442
- MediLink_ConfigLoader.log("Total {} 'Procedure Code' rows updated.".format(updated_count), level="INFO")
443
-
444
- return True
445
-
446
- def update_diagnosis_codes(csv_data):
447
- try:
448
- # Use cached configuration instead of loading repeatedly
449
- config, crosswalk = get_cached_configuration()
450
-
451
- # Extract the local storage path from the configuration
452
- local_storage_path = config['MediLink_Config']['local_storage_path']
453
-
454
- # Initialize a dictionary to hold diagnosis codes from all DOCX files
455
- all_patient_data = {}
456
-
457
- # Convert surgery dates in CSV data
458
- convert_surgery_date(csv_data)
459
-
460
- # Extract all valid surgery dates from csv_data
461
- surgery_dates = [row['Surgery Date'] for row in csv_data if row['Surgery Date'] != datetime.min]
462
-
463
- if not surgery_dates:
464
- raise ValueError("No valid surgery dates found in csv_data.")
465
-
466
- # Determine the minimum and maximum surgery dates
467
- min_surgery_date = min(surgery_dates)
468
- max_surgery_date = max(surgery_dates)
469
-
470
- # Apply a ±8-day margin to the surgery dates... Increased from 5 days.
471
- margin = timedelta(days=8)
472
- threshold_start = min_surgery_date - margin
473
- threshold_end = max_surgery_date + margin
474
-
475
- # TODO (Low) This is a bad idea. We need a better way to handle this because it leaves
476
- # us with a situation where if we take 'too long' to download the DOCX files, it will presume that the DOCX files are out of range because
477
- # the modfied date is a bad proxy for the date of the surgery which would be contained inside the DOCX file. The processing overhead for extracting the
478
- # date of the surgery from the DOCX file is non-trivial and computationally expensive so we need a smarter way to handle this.
479
-
480
- MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
481
-
482
- # Gather all relevant DOCX files in the specified directory
483
- # Optimize by combining file gathering and filtering in one pass
484
- valid_files = []
485
- try:
486
- for filename in os.listdir(local_storage_path):
487
- if filename.endswith(".docx"):
488
- filepath = os.path.join(local_storage_path, filename)
489
- # Check modification time only once per file
490
- try:
491
- mtime = os.path.getmtime(filepath)
492
- if threshold_start <= datetime.fromtimestamp(mtime) <= threshold_end:
493
- valid_files.append(filepath)
494
- except (OSError, ValueError):
495
- # Skip files with invalid modification times
496
- continue
497
- except OSError:
498
- MediLink_ConfigLoader.log("Error accessing directory: {}".format(local_storage_path), level="ERROR")
499
- return
500
-
501
- # Process valid DOCX files
502
- for filepath in valid_files:
503
- MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
504
- try:
505
- patient_data = parse_docx(filepath, surgery_dates) # Pass surgery_dates to parse_docx
506
- for patient_id, service_dates in patient_data.items():
507
- if patient_id not in all_patient_data:
508
- all_patient_data[patient_id] = {}
509
- for date_of_service, diagnosis_data in service_dates.items():
510
- all_patient_data[patient_id][date_of_service] = diagnosis_data
511
- except Exception as e:
512
- MediLink_ConfigLoader.log("Error parsing DOCX file {}: {}".format(filepath, e), level="ERROR")
513
-
514
- # Log if no valid files were found
515
- if not valid_files:
516
- MediLink_ConfigLoader.log("No valid DOCX files found within the modification time threshold.", level="INFO")
517
-
518
- # Debug logging for all_patient_data
519
- MediLink_ConfigLoader.log("All patient data collected from DOCX files: {}".format(all_patient_data), level="DEBUG")
520
-
521
- # Extract patient IDs from csv_data for efficient matching
522
- patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
523
-
524
- # Check if any patient data was collected
525
- if not all_patient_data or not patient_ids_in_csv.intersection(all_patient_data.keys()):
526
- MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
527
- return # Exit the function early if no data is available
528
-
529
- # Get Medisoft shorthand dictionary from crosswalk.
530
- diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {})
531
-
532
- # Initialize counter for updated rows
533
- updated_count = 0
534
-
535
- # Update the "Default Diagnosis #1" column in the CSV data
536
- for row_num, row in enumerate(csv_data, start=1):
537
- patient_id = row.get('Patient ID', '').strip()
538
- if patient_id not in patient_ids_in_csv:
539
- continue # Skip rows that do not match any patient ID
540
-
541
- MediLink_ConfigLoader.log("Processing row number {}.".format(row_num), level="DEBUG")
542
- surgery_date = row.get('Surgery Date', '')
543
-
544
- # Convert surgery_date to string format for lookup
545
- if surgery_date != datetime.min:
546
- surgery_date_str = surgery_date.strftime("%m-%d-%Y")
547
- else:
548
- surgery_date_str = ''
549
-
550
- MediLink_ConfigLoader.log("Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
551
-
552
- if patient_id in all_patient_data:
553
- if surgery_date_str in all_patient_data[patient_id]:
554
- diagnosis_code, left_or_right_eye, femto_yes_or_no = all_patient_data[patient_id][surgery_date_str]
555
- MediLink_ConfigLoader.log("Found diagnosis data for Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
556
-
557
- # Convert diagnosis code to Medisoft shorthand format.
558
- medisoft_shorthand = diagnosis_to_medisoft.get(diagnosis_code, None)
559
- if medisoft_shorthand is None and diagnosis_code:
560
- defaulted_code = diagnosis_code.lstrip('H').lstrip('T8').replace('.', '')[-5:]
561
- medisoft_shorthand = defaulted_code
562
- MediLink_ConfigLoader.log("Converted diagnosis code to Medisoft shorthand: {}".format(medisoft_shorthand), level="DEBUG")
563
-
564
- row['Default Diagnosis #1'] = medisoft_shorthand
565
- updated_count += 1
566
- MediLink_ConfigLoader.log("Updated row number {} with new diagnosis code.".format(row_num), level="INFO")
567
- else:
568
- MediLink_ConfigLoader.log("No matching surgery date found for Patient ID: {} in row {}.".format(patient_id, row_num), level="INFO")
569
- else:
570
- MediLink_ConfigLoader.log("Patient ID: {} not found in DOCX data for row {}.".format(patient_id, row_num), level="INFO")
571
-
572
- # Log total count of updated rows
573
- MediLink_ConfigLoader.log("Total {} 'Default Diagnosis #1' rows updated.".format(updated_count), level="INFO")
574
-
575
- except Exception as e:
576
- message = "An error occurred while updating diagnosis codes. Please check the DOCX files and configuration: {}".format(e)
577
- MediLink_ConfigLoader.log(message, level="ERROR")
578
- print(message)
579
-
580
- def load_data_sources(config, crosswalk):
581
- """Loads historical mappings from MAPAT and Carol's CSVs."""
582
- patient_id_to_insurance_id = load_insurance_data_from_mapat(config, crosswalk)
583
- if not patient_id_to_insurance_id:
584
- raise ValueError("Failed to load historical Patient ID to Insurance ID mappings from MAPAT.")
585
-
586
- payer_id_to_patient_ids = load_historical_payer_to_patient_mappings(config)
587
- if not payer_id_to_patient_ids:
588
- raise ValueError("Failed to load historical Carol's CSVs.")
589
-
590
- return patient_id_to_insurance_id, payer_id_to_patient_ids
591
-
592
- def map_payer_ids_to_insurance_ids(patient_id_to_insurance_id, payer_id_to_patient_ids):
593
- """Maps Payer IDs to Insurance IDs based on the historical mappings."""
594
- payer_id_to_details = {}
595
- for payer_id, patient_ids in payer_id_to_patient_ids.items():
596
- medisoft_ids = set()
597
- for patient_id in patient_ids:
598
- if patient_id in patient_id_to_insurance_id:
599
- medisoft_id = patient_id_to_insurance_id[patient_id]
600
- medisoft_ids.add(medisoft_id)
601
- MediLink_ConfigLoader.log("Added Medisoft ID {} for Patient ID {} and Payer ID {}".format(medisoft_id, patient_id, payer_id))
602
- else:
603
- MediLink_ConfigLoader.log("No matching Insurance ID found for Patient ID {}".format(patient_id))
604
- if medisoft_ids:
605
- payer_id_to_details[payer_id] = {
606
- "endpoint": "OPTUMEDI", # TODO Default, to be refined via API poll. There are 2 of these defaults!
607
- "medisoft_id": list(medisoft_ids),
608
- "medisoft_medicare_id": [] # Placeholder for future implementation
609
- }
610
- return payer_id_to_details
611
-
612
- def load_insurance_data_from_mains(config):
613
- """
614
- Loads insurance data from MAINS and creates a mapping from insurance names to their respective IDs.
615
- This mapping is critical for the crosswalk update process to correctly associate payer IDs with insurance IDs.
616
-
617
- Args:
618
- config (dict): Configuration object containing necessary paths and parameters.
619
-
620
- Returns:
621
- dict: A dictionary mapping insurance names to insurance IDs.
622
- """
623
- # Use cached configuration to avoid repeated loading
624
- config, crosswalk = get_cached_configuration()
625
-
626
- # Retrieve MAINS path and slicing information from the configuration
627
- # TODO (Low) For secondary insurance, this needs to be pulling from the correct MAINS (there are 2)
628
- # TODO (Low) Performance: There probably needs to be a dictionary proxy for MAINS that gets updated.
629
- # Meh, this just has to be part of the new architecture plan where we make Medisoft a downstream
630
- # recipient from the db.
631
- # TODO (High) The Medisoft Medicare flag needs to be brought in here.
632
- mains_path = config['MAINS_MED_PATH']
633
- mains_slices = crosswalk['mains_mapping']['slices']
634
-
635
- # Initialize the dictionary to hold the insurance to insurance ID mappings
636
- insurance_to_id = {}
637
-
638
- # Read data from MAINS using a provided function to handle fixed-width data
639
- for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
640
- insurance_name = record['MAINSNAME']
641
- # Assuming line_number gives the correct insurance ID without needing adjustment
642
- insurance_to_id[insurance_name] = line_number
643
-
644
- return insurance_to_id
645
-
646
- def load_insurance_data_from_mapat(config, crosswalk):
647
- """
648
- Loads insurance data from MAPAT and creates a mapping from patient ID to insurance ID.
649
-
650
- Args:
651
- config (dict): Configuration object containing necessary paths and parameters.
652
- crosswalk ... ADD HERE.
653
-
654
- Returns:
655
- dict: A dictionary mapping patient IDs to insurance IDs.
656
- """
657
- # Retrieve MAPAT path and slicing information from the configuration
658
- mapat_path = app_control.get_mapat_med_path()
659
- mapat_slices = crosswalk['mapat_mapping']['slices']
660
-
661
- # Initialize the dictionary to hold the patient ID to insurance ID mappings
662
- patient_id_to_insurance_id = {}
663
-
664
- # Read data from MAPAT using a provided function to handle fixed-width data
665
- for record, _ in MediLink_DataMgmt.read_general_fixed_width_data(mapat_path, mapat_slices):
666
- patient_id = record['MAPATPXID']
667
- insurance_id = record['MAPATINID']
668
- patient_id_to_insurance_id[patient_id] = insurance_id
669
-
670
- return patient_id_to_insurance_id
671
-
672
- def parse_z_dat(z_dat_path, config): # Why is this in MediBot and not MediLink?
673
- """
674
- Parses the Z.dat file to map Patient IDs to Insurance Names using the provided fixed-width file format.
675
-
676
- Args:
677
- z_dat_path (str): Path to the Z.dat file.
678
- config (dict): Configuration object containing slicing information and other parameters.
679
-
680
- Returns:
681
- dict: A dictionary mapping Patient IDs to Insurance Names.
682
- """
683
- patient_id_to_insurance_name = {}
684
-
685
- try:
686
- # Reading blocks of fixed-width data (up to 5 lines per record)
687
- for personal_info, insurance_info, service_info, service_info_2, service_info_3 in MediLink_DataMgmt.read_fixed_width_data(z_dat_path):
688
- # Parsing the data using slice definitions from the config
689
- parsed_data = MediLink_DataMgmt.parse_fixed_width_data(personal_info, insurance_info, service_info, service_info_2, service_info_3, config.get('MediLink_Config', config))
690
-
691
- # Extract Patient ID and Insurance Name from parsed data
692
- patient_id = parsed_data.get('PATID')
693
- insurance_name = parsed_data.get('INAME')
694
-
695
- if patient_id and insurance_name:
696
- patient_id_to_insurance_name[patient_id] = insurance_name
697
- MediLink_ConfigLoader.log("Mapped Patient ID {} to Insurance Name {}".format(patient_id, insurance_name), config, level="INFO")
698
-
699
- except FileNotFoundError:
700
- MediLink_ConfigLoader.log("File not found: {}".format(z_dat_path), config, level="INFO")
701
- except Exception as e:
702
- MediLink_ConfigLoader.log("Failed to parse Z.dat: {}".format(str(e)), config, level="INFO")
703
-
704
- return patient_id_to_insurance_name
705
-
706
- def load_historical_payer_to_patient_mappings(config):
707
- """
708
- Loads historical mappings from multiple Carol's CSV files in a specified directory,
709
- mapping Payer IDs to sets of Patient IDs.
710
-
711
- Args:
712
- config (dict): Configuration object containing the directory path for Carol's CSV files
713
- and other necessary parameters.
714
-
715
- Returns:
716
- dict: A dictionary where each key is a Payer ID and the value is a set of Patient IDs.
717
- """
718
- directory_path = os.path.dirname(config['CSV_FILE_PATH'])
719
- payer_to_patient_ids = defaultdict(set)
720
-
721
- try:
722
- # Check if the directory exists
723
- if not os.path.isdir(directory_path):
724
- raise FileNotFoundError("Directory '{}' not found.".format(directory_path))
725
-
726
- # Loop through each file in the directory containing Carol's historical CSVs
727
- for filename in os.listdir(directory_path):
728
- file_path = os.path.join(directory_path, filename)
729
- if filename.endswith('.csv'):
730
- try:
731
- with open(file_path, 'r', encoding='utf-8') as csvfile:
732
- reader = csv.DictReader(csvfile)
733
- patient_count = 0 # Counter for Patient IDs found in this CSV
734
- for row in reader:
735
- if 'Patient ID' not in row or 'Ins1 Payer ID' not in row:
736
- continue # Skip this row if either key is missing
737
- if not row.get('Patient ID').strip() or not row.get('Ins1 Payer ID').strip():
738
- continue # Skip this row if either value is missing or empty
739
-
740
- payer_id = row['Ins1 Payer ID'].strip()
741
- patient_id = row['Patient ID'].strip()
742
- payer_to_patient_ids[payer_id].add(patient_id)
743
- patient_count += 1 # Increment the counter for each valid mapping
744
-
745
- # Log the accumulated count for this CSV file
746
- if patient_count > 0:
747
- MediLink_ConfigLoader.log("CSV file '{}' has {} Patient IDs with Payer IDs.".format(filename, patient_count), level="DEBUG")
748
- else:
749
- MediLink_ConfigLoader.log("CSV file '{}' is empty or does not have valid Patient ID or Payer ID mappings.".format(filename), level="DEBUG")
750
- except Exception as e:
751
- print("Error processing file {}: {}".format(filename, e))
752
- MediLink_ConfigLoader.log("Error processing file '{}': {}".format(filename, e), level="ERROR")
753
- except FileNotFoundError as e:
754
- print("Error: {}".format(e))
755
-
756
- if not payer_to_patient_ids:
757
- print("No historical mappings were generated.")
758
-
759
- return dict(payer_to_patient_ids)
760
-
761
- def capitalize_all_fields(csv_data):
762
- """
763
- Converts all text fields in the CSV data to uppercase.
764
-
765
- Parameters:
766
- csv_data (list of dict): The CSV data where each row is represented as a dictionary.
767
-
768
- Returns:
769
- None: The function modifies the csv_data in place.
770
- """
771
- for row in csv_data:
772
- for key, value in row.items():
773
- if isinstance(value, str):
774
- row[key] = value.upper()
775
- elif isinstance(value, datetime):
776
- # Keep datetime objects as they are
777
- pass
778
- elif value is not None:
779
- # Convert any other non-None values to string and then uppercase
1
+ #MediBot_Preprocessor_lib.py
2
+ from collections import OrderedDict, defaultdict
3
+ from datetime import datetime, timedelta
4
+ import os, csv, sys
5
+ import chardet # Ensure chardet is imported
6
+
7
+ # Add the parent directory of the project to the Python path
8
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
+
10
+ # Configuration cache to avoid repeated loading
11
+ _config_cache = None
12
+ _crosswalk_cache = None
13
+
14
+ # Attempt to import necessary modules, falling back if they are not found
15
+ try:
16
+ import MediLink_ConfigLoader
17
+ import MediLink_DataMgmt
18
+ except ImportError:
19
+ from MediLink import MediLink_ConfigLoader, MediLink_DataMgmt
20
+
21
+ try:
22
+ from MediBot_UI import app_control
23
+ from MediBot_docx_decoder import parse_docx
24
+ except ImportError:
25
+ from MediBot import MediBot_UI
26
+ app_control = MediBot_UI.app_control
27
+ from MediBot import MediBot_docx_decoder
28
+ parse_docx = MediBot_docx_decoder.parse_docx
29
+
30
+ class InitializationError(Exception):
31
+ def __init__(self, message):
32
+ self.message = message
33
+ super().__init__(self.message)
34
+
35
+ def initialize(config):
36
+ global AHK_EXECUTABLE, CSV_FILE_PATH, field_mapping, page_end_markers
37
+
38
+ required_keys = {
39
+ 'AHK_EXECUTABLE': "",
40
+ 'CSV_FILE_PATH': "",
41
+ 'field_mapping': {},
42
+ 'page_end_markers': []
43
+ }
44
+
45
+ for key, default in required_keys.items():
46
+ try:
47
+ globals()[key] = config.get(key, default) if key != 'field_mapping' else OrderedDict(config.get(key, default))
48
+ except AttributeError:
49
+ raise InitializationError("Error: '{}' not found in config.".format(key))
50
+
51
+ def get_cached_configuration():
52
+ """
53
+ Returns cached configuration and crosswalk data to avoid repeated I/O operations.
54
+ """
55
+ global _config_cache, _crosswalk_cache
56
+
57
+ if _config_cache is None or _crosswalk_cache is None:
58
+ _config_cache, _crosswalk_cache = MediLink_ConfigLoader.load_configuration()
59
+
60
+ return _config_cache, _crosswalk_cache
61
+
62
+ def open_csv_for_editing(csv_file_path):
63
+ try:
64
+ # Open the CSV file with its associated application
65
+ os.system('start "" "{}"'.format(csv_file_path))
66
+ print("After saving the revised CSV, please re-run MediBot.")
67
+ except Exception as e:
68
+ print("Failed to open CSV file:", e)
69
+
70
+ # Function to clean the headers
71
+ def clean_header(headers):
72
+ """
73
+ Cleans the header strings by removing unwanted characters and trimming whitespace.
74
+
75
+ Parameters:
76
+ headers (list of str): The original header strings.
77
+
78
+ Returns:
79
+ list of str: The cleaned header strings.
80
+ """
81
+ cleaned_headers = []
82
+
83
+ for header in headers:
84
+ # Strip leading and trailing whitespace
85
+ cleaned_header = header.strip()
86
+ # Remove unwanted characters while keeping spaces, alphanumeric characters, hyphens, and underscores
87
+ cleaned_header = ''.join(char for char in cleaned_header if char.isalnum() or char.isspace() or char in ['-', '_'])
88
+ cleaned_headers.append(cleaned_header)
89
+
90
+ # Log the original and cleaned headers for debugging
91
+ MediLink_ConfigLoader.log("Original headers: {}".format(headers), level="INFO")
92
+ MediLink_ConfigLoader.log("Cleaned headers: {}".format(cleaned_headers), level="INFO")
93
+
94
+ # Check if 'Surgery Date' is in the cleaned headers
95
+ if 'Surgery Date' not in cleaned_headers:
96
+ MediLink_ConfigLoader.log("WARNING: 'Surgery Date' header not found after cleaning.", level="WARNING")
97
+ print("WARNING: 'Surgery Date' header not found after cleaning.")
98
+ raise ValueError("Error: 'Surgery Date' header not found after cleaning.")
99
+
100
+ return cleaned_headers
101
+
102
+ # Function to load and process CSV data
103
+ def load_csv_data(csv_file_path):
104
+ try:
105
+ # Check if the file exists
106
+ if not os.path.exists(csv_file_path):
107
+ raise FileNotFoundError("***Error: CSV file '{}' not found.".format(csv_file_path))
108
+
109
+ # Detect the file encoding
110
+ with open(csv_file_path, 'rb') as f:
111
+ raw_data = f.read()
112
+ result = chardet.detect(raw_data)
113
+ encoding = result['encoding']
114
+ confidence = result['confidence']
115
+ print("Detected encoding: {} (Confidence: {:.2f})".format(encoding, confidence))
116
+
117
+ # Read the CSV file with the detected encoding
118
+ with open(csv_file_path, 'r', encoding=encoding) as csvfile:
119
+ reader = csv.DictReader(csvfile)
120
+ # Clean the headers
121
+ cleaned_headers = clean_header(reader.fieldnames)
122
+
123
+ # Create a mapping of cleaned headers to original headers (pre-compute once)
124
+ header_mapping = {cleaned_headers[i]: reader.fieldnames[i] for i in range(len(cleaned_headers))}
125
+
126
+ # Process the remaining rows - optimize by pre-allocating the list
127
+ csv_data = []
128
+ # Pre-allocate list size if we can estimate it (optional optimization)
129
+ # csv_data = [None] * estimated_size # if we had row count
130
+
131
+ for row in reader:
132
+ # Use dict() constructor with generator expression for better performance
133
+ cleaned_row = dict((cleaned_headers[i], row[header_mapping[cleaned_headers[i]]])
134
+ for i in range(len(cleaned_headers)))
135
+ csv_data.append(cleaned_row)
136
+
137
+ return csv_data # Return a list of dictionaries
138
+ except FileNotFoundError as e:
139
+ print(e) # Print the informative error message
140
+ print("Hint: Check if CSV file is located in the expected directory or specify a different path in config file.")
141
+ print("Please correct the issue and re-run MediBot.")
142
+ sys.exit(1) # Halt the script
143
+ except IOError as e:
144
+ print("Error reading CSV file: {}. Please check the file path and permissions.".format(e))
145
+ sys.exit(1) # Halt the script in case of other IO errors
146
+
147
+ # CSV Pre-processor Helper functions
148
+ def add_columns(csv_data, column_headers):
149
+ """
150
+ Adds one or multiple columns to the CSV data.
151
+
152
+ Parameters:
153
+ csv_data (list of dict): The CSV data where each row is represented as a dictionary.
154
+ column_headers (list of str or str): A list of column headers to be added to each row, or a single column header.
155
+
156
+ Returns:
157
+ None: The function modifies the csv_data in place.
158
+ """
159
+ if isinstance(column_headers, str):
160
+ column_headers = [column_headers]
161
+ elif not isinstance(column_headers, list):
162
+ raise ValueError("column_headers should be a list or a string")
163
+
164
+ for row in csv_data:
165
+ for header in column_headers:
166
+ row[header] = '' # Initialize the column with empty values
167
+
168
+ # Extracting the list to a variable for future refactoring:
169
+ def filter_rows(csv_data):
170
+ # TODO: This should be handled in the crosswalk.
171
+ excluded_insurance = {'AETNA', 'AETNA MEDICARE', 'HUMANA MED HMO'}
172
+ csv_data[:] = [row for row in csv_data if row.get('Patient ID') and row.get('Primary Insurance') not in excluded_insurance]
173
+
174
+ def convert_surgery_date(csv_data):
175
+ for row in csv_data:
176
+ surgery_date_str = row.get('Surgery Date', '')
177
+ if not surgery_date_str:
178
+ MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
179
+ # BUG This needs a cleaning step for the Surgery Date string in case we're receiving damaged data.
180
+ row['Surgery Date'] = datetime.min # Assign a minimum datetime value if empty
181
+ print("Surgery Date not found for row: {}".format(row))
182
+ else:
183
+ try:
184
+ row['Surgery Date'] = datetime.strptime(surgery_date_str, '%m/%d/%Y')
185
+ except ValueError as e:
186
+ MediLink_ConfigLoader.log("Error parsing Surgery Date '{}': {} for row: {}".format(surgery_date_str, e, row), level="ERROR")
187
+ row['Surgery Date'] = datetime.min # Assign a minimum datetime value if parsing fails
188
+
189
+ def sort_and_deduplicate(csv_data):
190
+ # Create a dictionary to hold unique patients based on Patient ID
191
+ unique_patients = {}
192
+
193
+ # Iterate through the CSV data and populate the unique_patients dictionary
194
+ for row in csv_data:
195
+ patient_id = row.get('Patient ID')
196
+ if patient_id not in unique_patients:
197
+ unique_patients[patient_id] = row
198
+ else:
199
+ # If the patient ID already exists, compare surgery dates
200
+ existing_row = unique_patients[patient_id]
201
+ if row['Surgery Date'] < existing_row['Surgery Date']:
202
+ unique_patients[patient_id] = row
203
+
204
+ # Convert the unique_patients dictionary back to a list and sort it
205
+ csv_data[:] = sorted(unique_patients.values(), key=lambda x: (x['Surgery Date'], x.get('Patient Last', '').strip())) # TODO Does this need to be sorted twice? once before and once after?
206
+
207
+ # TODO: Consider adding an option in the config to sort based on Surgery Schedules when available.
208
+ # If no schedule is available, the current sorting strategy will be used.
209
+
210
+ def combine_fields(csv_data):
211
+ for row in csv_data:
212
+ # Safely handle the 'Surgery Date' conversion
213
+ surgery_date = row.get('Surgery Date')
214
+ row['Surgery Date'] = surgery_date.strftime('%m/%d/%Y') if surgery_date else ''
215
+
216
+ first_name = '_'.join(part.strip() for part in row.get('Patient First', '').split()) # Join the first name parts with underscores after cleaning.
217
+ middle_name = row.get('Patient Middle', '').strip()
218
+ middle_name = middle_name[0] if len(middle_name) > 1 else '' # Take only the first character or empty
219
+ last_name = '_'.join(part.strip() for part in row.get('Patient Last', '').split()) # Join the last name parts with underscores after cleaning.
220
+ row['Patient Name'] = ', '.join(filter(None, [last_name, first_name])) + (' ' + middle_name if middle_name else '') # Comma between last and first, space before middle
221
+
222
+ address1 = row.get('Patient Address1', '').strip()
223
+ address2 = row.get('Patient Address2', '').strip()
224
+ row['Patient Street'] = ' '.join(filter(None, [address1, address2])) # Join non-empty addresses
225
+
226
+ def apply_replacements(csv_data, crosswalk):
227
+ replacements = crosswalk.get('csv_replacements', {})
228
+ # Pre-define the keys to check for better performance
229
+ keys_to_check = ['Patient SSN', 'Primary Insurance', 'Ins1 Payer ID']
230
+
231
+ for row in csv_data:
232
+ # Use early termination - check each replacement only if needed
233
+ for old_value, new_value in replacements.items():
234
+ replacement_made = False
235
+ for key in keys_to_check:
236
+ if row.get(key) == old_value:
237
+ row[key] = new_value
238
+ replacement_made = True
239
+ break # Exit the key loop once a replacement is made
240
+ if replacement_made:
241
+ break # Exit the replacement loop once any replacement is made
242
+
243
+ import difflib
244
+ from collections import defaultdict
245
+
246
+ def find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names):
247
+ """
248
+ Finds the best matching Medisoft ID for a given insurance name using fuzzy matching.
249
+
250
+ Parameters:
251
+ - insurance_name (str): The insurance name from the CSV row.
252
+ - medisoft_ids (list): List of Medisoft IDs associated with the Payer ID.
253
+ - medisoft_to_mains_names (dict): Mapping from Medisoft ID to list of MAINS names.
254
+
255
+ Returns:
256
+ - int or None: The best matching Medisoft ID or None if no match is found.
257
+ """
258
+ best_match_ratio = 0
259
+ best_medisoft_id = None
260
+
261
+ # Pre-process insurance name once
262
+ processed_insurance = ''.join(c for c in insurance_name if not c.isdigit()).upper()
263
+
264
+ for medisoft_id in medisoft_ids:
265
+ mains_names = medisoft_to_mains_names.get(medisoft_id, [])
266
+ for mains_name in mains_names:
267
+ # Preprocess names by extracting non-numeric characters and converting to uppercase
268
+ # Use more efficient string processing
269
+ processed_mains = ''.join(c for c in mains_name if not c.isdigit()).upper()
270
+
271
+ # Log the processed names before computing the match ratio
272
+ MediLink_ConfigLoader.log("Processing Medisoft ID '{}': Comparing processed insurance '{}' with processed mains '{}'.".format(medisoft_id, processed_insurance, processed_mains), level="DEBUG")
273
+
274
+ # Compute the similarity ratio
275
+ match_ratio = difflib.SequenceMatcher(None, processed_insurance, processed_mains).ratio()
276
+
277
+ # Log the match ratio
278
+ MediLink_ConfigLoader.log("Match ratio for Medisoft ID '{}': {:.2f}".format(medisoft_id, match_ratio), level="DEBUG")
279
+
280
+ if match_ratio > best_match_ratio:
281
+ best_match_ratio = match_ratio
282
+ best_medisoft_id = medisoft_id
283
+ # Log the current best match
284
+ MediLink_ConfigLoader.log("New best match found: Medisoft ID '{}' with match ratio {:.2f}".format(best_medisoft_id, best_match_ratio), level="DEBUG")
285
+
286
+ # Log the final best match ratio and ID
287
+ MediLink_ConfigLoader.log("Final best match ratio: {:.2f} for Medisoft ID '{}'".format(best_match_ratio, best_medisoft_id), level="DEBUG")
288
+
289
+ # No threshold applied, return the best match found
290
+ return best_medisoft_id
291
+
292
+ def NEW_update_insurance_ids(csv_data, config, crosswalk):
293
+ """
294
+ Updates the 'Ins1 Insurance ID' field in each row of csv_data based on the crosswalk and MAINS data.
295
+
296
+ Parameters:
297
+ - csv_data (list of dict): The CSV data where each row is represented as a dictionary.
298
+ - config (dict): Configuration object containing necessary paths and parameters.
299
+ - crosswalk (dict): Crosswalk data containing mappings between Payer IDs and Medisoft IDs.
300
+
301
+ Returns:
302
+ - None: The function modifies the csv_data in place.
303
+ """
304
+ processed_payer_ids = set() # Track processed Payer IDs
305
+ MediLink_ConfigLoader.log("Starting update of insurance IDs.", level="INFO")
306
+
307
+ # Load MAINS data to get mapping from Medisoft ID to MAINS names
308
+ insurance_to_id = load_insurance_data_from_mains(config) # Assuming it returns a dict mapping insurance names to IDs
309
+ MediLink_ConfigLoader.log("Loaded MAINS data for insurance to ID mapping.", level="DEBUG")
310
+
311
+ # Invert the mapping to get Medisoft ID to MAINS names
312
+ medisoft_to_mains_names = defaultdict(list)
313
+ for insurance_name, medisoft_id in insurance_to_id.items():
314
+ medisoft_to_mains_names[medisoft_id].append(insurance_name)
315
+
316
+ for row in csv_data:
317
+ ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
318
+ MediLink_ConfigLoader.log("Processing row with Ins1 Payer ID: '{}'.".format(ins1_payer_id), level="DEBUG")
319
+
320
+ if ins1_payer_id:
321
+ # Mark this Payer ID as processed
322
+ if ins1_payer_id not in processed_payer_ids:
323
+ processed_payer_ids.add(ins1_payer_id) # Add to set
324
+ MediLink_ConfigLoader.log("Marked Payer ID '{}' as processed.".format(ins1_payer_id), level="DEBUG")
325
+
326
+ # Retrieve Medisoft IDs for the current Payer ID
327
+ medisoft_ids = crosswalk.get('payer_id', {}).get(ins1_payer_id, {}).get('medisoft_id', [])
328
+ MediLink_ConfigLoader.log("Retrieved Medisoft IDs for Payer ID '{}': {}".format(ins1_payer_id, medisoft_ids), level="DEBUG")
329
+
330
+ if not medisoft_ids:
331
+ MediLink_ConfigLoader.log("No Medisoft IDs available for Payer ID '{}', creating placeholder entry.".format(ins1_payer_id), level="WARNING")
332
+ # Create a placeholder entry in the crosswalk
333
+ if 'payer_id' not in crosswalk:
334
+ crosswalk['payer_id'] = {}
335
+ crosswalk['payer_id'][ins1_payer_id] = {
336
+ 'medisoft_id': [], # Placeholder for future Medisoft IDs
337
+ 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
338
+ 'endpoint': None # Placeholder for future endpoint
339
+ }
340
+ continue # Skip further processing for this Payer ID
341
+
342
+ # If only one Medisoft ID is associated, assign it directly
343
+ if len(medisoft_ids) == 1:
344
+ try:
345
+ medisoft_id = int(medisoft_ids[0])
346
+ row['Ins1 Insurance ID'] = medisoft_id
347
+ MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row number {} with Payer ID '{}'.".format(medisoft_id, csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
348
+ except ValueError as e:
349
+ MediLink_ConfigLoader.log("Error converting Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
350
+ row['Ins1 Insurance ID'] = None
351
+ continue # Move to the next row
352
+
353
+ # If multiple Medisoft IDs are associated, perform fuzzy matching
354
+ insurance_name = row.get('Primary Insurance', '').strip()
355
+ if not insurance_name:
356
+ MediLink_ConfigLoader.log("Row with Payer ID '{}' missing 'Primary Insurance', skipping assignment.".format(ins1_payer_id), level="WARNING")
357
+ continue # Skip if insurance name is missing
358
+
359
+ best_medisoft_id = find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names)
360
+
361
+ if best_medisoft_id:
362
+ row['Ins1 Insurance ID'] = best_medisoft_id
363
+ MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row with Payer ID '{}' based on fuzzy match.".format(best_medisoft_id, ins1_payer_id), level="INFO")
364
+ else:
365
+ # Default to the first Medisoft ID if no good match is found
366
+ try:
367
+ default_medisoft_id = int(medisoft_ids[0])
368
+ row['Ins1 Insurance ID'] = default_medisoft_id
369
+ MediLink_ConfigLoader.log("No suitable match found. Defaulted to Medisoft ID '{}' for Payer ID '{}'.".format(default_medisoft_id, ins1_payer_id), level="INFO")
370
+ except ValueError as e:
371
+ MediLink_ConfigLoader.log("Error converting default Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
372
+ row['Ins1 Insurance ID'] = None
373
+
374
+ def update_insurance_ids(csv_data, config, crosswalk):
375
+ MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
376
+
377
+ # PERFORMANCE FIX: Pre-build optimized lookup dictionaries for both regular and Medicare IDs
378
+ # This reduces Medicare processing overhead by building lookups once instead of repeated processing
379
+ payer_id_to_medisoft = {}
380
+ payer_id_to_medicare = {}
381
+ MediLink_ConfigLoader.log("Initialized optimized lookup dictionaries for Medicare and regular IDs.", level="DEBUG")
382
+
383
+ # Build both lookup dictionaries simultaneously to avoid multiple iterations
384
+ for payer_id, details in crosswalk.get('payer_id', {}).items():
385
+ # Get both regular and Medicare IDs
386
+ medisoft_ids = details.get('medisoft_id', [])
387
+ medicare_ids = details.get('medisoft_medicare_id', [])
388
+
389
+ # Filter empty strings once for each type
390
+ medisoft_ids = [id for id in medisoft_ids if id] if medisoft_ids else []
391
+ medicare_ids = [id for id in medicare_ids if id] if medicare_ids else []
392
+
393
+ # Store first valid ID for quick lookup (Medicare takes precedence if available)
394
+ payer_id_to_medisoft[payer_id] = int(medisoft_ids[0]) if medisoft_ids else None
395
+ payer_id_to_medicare[payer_id] = int(medicare_ids[0]) if medicare_ids else None
396
+
397
+ MediLink_ConfigLoader.log("Processed Payer ID '{}': Regular IDs: {}, Medicare IDs: {}".format(
398
+ payer_id, medisoft_ids, medicare_ids), level="DEBUG")
399
+
400
+ # PERFORMANCE FIX: Single pass through CSV data with optimized Medicare ID resolution
401
+ for row in csv_data:
402
+ ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
403
+ MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
404
+
405
+ # Try Medicare ID first, then fall back to regular ID (optimized Medicare processing)
406
+ insurance_id = (payer_id_to_medicare.get(ins1_payer_id) or
407
+ payer_id_to_medisoft.get(ins1_payer_id))
408
+
409
+ if insurance_id is None and ins1_payer_id not in payer_id_to_medisoft:
410
+ # Add placeholder entry for new payer ID (preserve original functionality)
411
+ payer_id_to_medisoft[ins1_payer_id] = None
412
+ payer_id_to_medicare[ins1_payer_id] = None
413
+ crosswalk.setdefault('payer_id', {})[ins1_payer_id] = {
414
+ 'medisoft_id': [], # Placeholder for future Medisoft IDs
415
+ 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
416
+ 'endpoint': None # Placeholder for future endpoint
417
+ }
418
+ MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
419
+
420
+ # Assign the resolved insurance ID to the row
421
+ row['Ins1 Insurance ID'] = insurance_id
422
+ MediLink_ConfigLoader.log("Assigned Insurance ID '{}' to row with Ins1 Payer ID '{}'.".format(insurance_id, ins1_payer_id), level="DEBUG")
423
+
424
+ def update_procedure_codes(csv_data, crosswalk):
425
+
426
+ # Get Medisoft shorthand dictionary from crosswalk and reverse it
427
+ diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {}) # BUG We need to be careful here in case we decide we need to change the crosswalk data specifically with regard to the T8/H usage.
428
+ medisoft_to_diagnosis = {v: k for k, v in diagnosis_to_medisoft.items()}
429
+
430
+ # Get procedure code to diagnosis dictionary from crosswalk and reverse it for easier lookup
431
+ diagnosis_to_procedure = {
432
+ diagnosis_code: procedure_code
433
+ for procedure_code, diagnosis_codes in crosswalk.get('procedure_to_diagnosis', {}).items()
434
+ for diagnosis_code in diagnosis_codes
435
+ }
436
+
437
+ # Initialize counter for updated rows
438
+ updated_count = 0
439
+
440
+ # Update the "Procedure Code" column in the CSV data
441
+ for row_num, row in enumerate(csv_data, start=1):
442
+ try:
443
+ medisoft_code = row.get('Default Diagnosis #1', '').strip()
444
+ diagnosis_code = medisoft_to_diagnosis.get(medisoft_code)
445
+ if diagnosis_code:
446
+ procedure_code = diagnosis_to_procedure.get(diagnosis_code)
447
+ if procedure_code:
448
+ row['Procedure Code'] = procedure_code
449
+ updated_count += 1
450
+ else:
451
+ row['Procedure Code'] = "Unknown" # Or handle as appropriate
452
+ else:
453
+ row['Procedure Code'] = "Unknown" # Or handle as appropriate
454
+ except Exception as e:
455
+ MediLink_ConfigLoader.log("In update_procedure_codes, Error processing row {}: {}".format(row_num, e), level="ERROR")
456
+
457
+ # Log total count of updated rows
458
+ MediLink_ConfigLoader.log("Total {} 'Procedure Code' rows updated.".format(updated_count), level="INFO")
459
+
460
+ return True
461
+
462
+ def update_diagnosis_codes(csv_data):
463
+ try:
464
+ # Use cached configuration instead of loading repeatedly
465
+ config, crosswalk = get_cached_configuration()
466
+
467
+ # Extract the local storage path from the configuration
468
+ local_storage_path = config['MediLink_Config']['local_storage_path']
469
+
470
+ # Initialize a dictionary to hold diagnosis codes from all DOCX files
471
+ all_patient_data = {}
472
+
473
+ # Convert surgery dates in CSV data
474
+ convert_surgery_date(csv_data)
475
+
476
+ # Extract all valid surgery dates from csv_data
477
+ surgery_dates = [row['Surgery Date'] for row in csv_data if row['Surgery Date'] != datetime.min]
478
+
479
+ if not surgery_dates:
480
+ raise ValueError("No valid surgery dates found in csv_data.")
481
+
482
+ # Determine the minimum and maximum surgery dates
483
+ min_surgery_date = min(surgery_dates)
484
+ max_surgery_date = max(surgery_dates)
485
+
486
+ # Apply a ±8-day margin to the surgery dates... Increased from 5 days.
487
+ margin = timedelta(days=8)
488
+ threshold_start = min_surgery_date - margin
489
+ threshold_end = max_surgery_date + margin
490
+
491
+ # TODO (Low) This is a bad idea. We need a better way to handle this because it leaves
492
+ # us with a situation where if we take 'too long' to download the DOCX files, it will presume that the DOCX files are out of range because
493
+ # the modfied date is a bad proxy for the date of the surgery which would be contained inside the DOCX file. The processing overhead for extracting the
494
+ # date of the surgery from the DOCX file is non-trivial and computationally expensive so we need a smarter way to handle this.
495
+
496
+ MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
497
+
498
+ # Gather all relevant DOCX files in the specified directory
499
+ # Optimize by combining file gathering and filtering in one pass
500
+ valid_files = []
501
+ try:
502
+ for filename in os.listdir(local_storage_path):
503
+ if filename.endswith(".docx"):
504
+ filepath = os.path.join(local_storage_path, filename)
505
+ # Check modification time only once per file
506
+ try:
507
+ mtime = os.path.getmtime(filepath)
508
+ if threshold_start <= datetime.fromtimestamp(mtime) <= threshold_end:
509
+ valid_files.append(filepath)
510
+ except (OSError, ValueError):
511
+ # Skip files with invalid modification times
512
+ continue
513
+ except OSError:
514
+ MediLink_ConfigLoader.log("Error accessing directory: {}".format(local_storage_path), level="ERROR")
515
+ return
516
+
517
+ # Process valid DOCX files
518
+ for filepath in valid_files:
519
+ MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
520
+ try:
521
+ patient_data = parse_docx(filepath, surgery_dates) # Pass surgery_dates to parse_docx
522
+ for patient_id, service_dates in patient_data.items():
523
+ if patient_id not in all_patient_data:
524
+ all_patient_data[patient_id] = {}
525
+ for date_of_service, diagnosis_data in service_dates.items():
526
+ all_patient_data[patient_id][date_of_service] = diagnosis_data
527
+ except Exception as e:
528
+ MediLink_ConfigLoader.log("Error parsing DOCX file {}: {}".format(filepath, e), level="ERROR")
529
+
530
+ # Log if no valid files were found
531
+ if not valid_files:
532
+ MediLink_ConfigLoader.log("No valid DOCX files found within the modification time threshold.", level="INFO")
533
+
534
+ # Debug logging for all_patient_data
535
+ MediLink_ConfigLoader.log("All patient data collected from DOCX files: {}".format(all_patient_data), level="DEBUG")
536
+
537
+ # Extract patient IDs from csv_data for efficient matching
538
+ patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
539
+
540
+ # Check if any patient data was collected
541
+ if not all_patient_data or not patient_ids_in_csv.intersection(all_patient_data.keys()):
542
+ MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
543
+ return # Exit the function early if no data is available
544
+
545
+ # Get Medisoft shorthand dictionary from crosswalk.
546
+ diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {})
547
+
548
+ # Initialize counter for updated rows
549
+ updated_count = 0
550
+
551
+ # Update the "Default Diagnosis #1" column in the CSV data
552
+ for row_num, row in enumerate(csv_data, start=1):
553
+ patient_id = row.get('Patient ID', '').strip()
554
+ if patient_id not in patient_ids_in_csv:
555
+ continue # Skip rows that do not match any patient ID
556
+
557
+ MediLink_ConfigLoader.log("Processing row number {}.".format(row_num), level="DEBUG")
558
+ surgery_date = row.get('Surgery Date', '')
559
+
560
+ # Convert surgery_date to string format for lookup
561
+ if surgery_date != datetime.min:
562
+ surgery_date_str = surgery_date.strftime("%m-%d-%Y")
563
+ else:
564
+ surgery_date_str = ''
565
+
566
+ MediLink_ConfigLoader.log("Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
567
+
568
+ if patient_id in all_patient_data:
569
+ if surgery_date_str in all_patient_data[patient_id]:
570
+ diagnosis_code, left_or_right_eye, femto_yes_or_no = all_patient_data[patient_id][surgery_date_str]
571
+ MediLink_ConfigLoader.log("Found diagnosis data for Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
572
+
573
+ # Convert diagnosis code to Medisoft shorthand format.
574
+ medisoft_shorthand = diagnosis_to_medisoft.get(diagnosis_code, None)
575
+ if medisoft_shorthand is None and diagnosis_code:
576
+ defaulted_code = diagnosis_code.lstrip('H').lstrip('T8').replace('.', '')[-5:]
577
+ medisoft_shorthand = defaulted_code
578
+ MediLink_ConfigLoader.log("Converted diagnosis code to Medisoft shorthand: {}".format(medisoft_shorthand), level="DEBUG")
579
+
580
+ row['Default Diagnosis #1'] = medisoft_shorthand
581
+ updated_count += 1
582
+ MediLink_ConfigLoader.log("Updated row number {} with new diagnosis code.".format(row_num), level="INFO")
583
+ else:
584
+ MediLink_ConfigLoader.log("No matching surgery date found for Patient ID: {} in row {}.".format(patient_id, row_num), level="INFO")
585
+ else:
586
+ MediLink_ConfigLoader.log("Patient ID: {} not found in DOCX data for row {}.".format(patient_id, row_num), level="INFO")
587
+
588
+ # Log total count of updated rows
589
+ MediLink_ConfigLoader.log("Total {} 'Default Diagnosis #1' rows updated.".format(updated_count), level="INFO")
590
+
591
+ except Exception as e:
592
+ message = "An error occurred while updating diagnosis codes. Please check the DOCX files and configuration: {}".format(e)
593
+ MediLink_ConfigLoader.log(message, level="ERROR")
594
+ print(message)
595
+
596
+ def load_data_sources(config, crosswalk):
597
+ """Loads historical mappings from MAPAT and Carol's CSVs."""
598
+ patient_id_to_insurance_id = load_insurance_data_from_mapat(config, crosswalk)
599
+ if not patient_id_to_insurance_id:
600
+ raise ValueError("Failed to load historical Patient ID to Insurance ID mappings from MAPAT.")
601
+
602
+ payer_id_to_patient_ids = load_historical_payer_to_patient_mappings(config)
603
+ if not payer_id_to_patient_ids:
604
+ raise ValueError("Failed to load historical Carol's CSVs.")
605
+
606
+ return patient_id_to_insurance_id, payer_id_to_patient_ids
607
+
608
+ def map_payer_ids_to_insurance_ids(patient_id_to_insurance_id, payer_id_to_patient_ids):
609
+ """Maps Payer IDs to Insurance IDs based on the historical mappings."""
610
+ payer_id_to_details = {}
611
+ for payer_id, patient_ids in payer_id_to_patient_ids.items():
612
+ medisoft_ids = set()
613
+ for patient_id in patient_ids:
614
+ if patient_id in patient_id_to_insurance_id:
615
+ medisoft_id = patient_id_to_insurance_id[patient_id]
616
+ medisoft_ids.add(medisoft_id)
617
+ MediLink_ConfigLoader.log("Added Medisoft ID {} for Patient ID {} and Payer ID {}".format(medisoft_id, patient_id, payer_id))
618
+ else:
619
+ MediLink_ConfigLoader.log("No matching Insurance ID found for Patient ID {}".format(patient_id))
620
+ if medisoft_ids:
621
+ payer_id_to_details[payer_id] = {
622
+ "endpoint": "OPTUMEDI", # TODO Default, to be refined via API poll. There are 2 of these defaults!
623
+ "medisoft_id": list(medisoft_ids),
624
+ "medisoft_medicare_id": [] # Placeholder for future implementation
625
+ }
626
+ return payer_id_to_details
627
+
628
+ def load_insurance_data_from_mains(config):
629
+ """
630
+ Loads insurance data from MAINS and creates a mapping from insurance names to their respective IDs.
631
+ This mapping is critical for the crosswalk update process to correctly associate payer IDs with insurance IDs.
632
+
633
+ Args:
634
+ config (dict): Configuration object containing necessary paths and parameters.
635
+
636
+ Returns:
637
+ dict: A dictionary mapping insurance names to insurance IDs.
638
+ """
639
+ # Use cached configuration to avoid repeated loading
640
+ config, crosswalk = get_cached_configuration()
641
+
642
+ # Retrieve MAINS path and slicing information from the configuration
643
+ # TODO (Low) For secondary insurance, this needs to be pulling from the correct MAINS (there are 2)
644
+ # TODO (Low) Performance: There probably needs to be a dictionary proxy for MAINS that gets updated.
645
+ # Meh, this just has to be part of the new architecture plan where we make Medisoft a downstream
646
+ # recipient from the db.
647
+ # TODO (High) The Medisoft Medicare flag needs to be brought in here.
648
+ mains_path = config['MAINS_MED_PATH']
649
+ mains_slices = crosswalk['mains_mapping']['slices']
650
+
651
+ # Initialize the dictionary to hold the insurance to insurance ID mappings
652
+ insurance_to_id = {}
653
+
654
+ # Read data from MAINS using a provided function to handle fixed-width data
655
+ for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
656
+ insurance_name = record['MAINSNAME']
657
+ # Assuming line_number gives the correct insurance ID without needing adjustment
658
+ insurance_to_id[insurance_name] = line_number
659
+
660
+ return insurance_to_id
661
+
662
+ def load_insurance_data_from_mapat(config, crosswalk):
663
+ """
664
+ Loads insurance data from MAPAT and creates a mapping from patient ID to insurance ID.
665
+
666
+ Args:
667
+ config (dict): Configuration object containing necessary paths and parameters.
668
+ crosswalk ... ADD HERE.
669
+
670
+ Returns:
671
+ dict: A dictionary mapping patient IDs to insurance IDs.
672
+ """
673
+ # Retrieve MAPAT path and slicing information from the configuration
674
+ mapat_path = app_control.get_mapat_med_path()
675
+ mapat_slices = crosswalk['mapat_mapping']['slices']
676
+
677
+ # Initialize the dictionary to hold the patient ID to insurance ID mappings
678
+ patient_id_to_insurance_id = {}
679
+
680
+ # Read data from MAPAT using a provided function to handle fixed-width data
681
+ for record, _ in MediLink_DataMgmt.read_general_fixed_width_data(mapat_path, mapat_slices):
682
+ patient_id = record['MAPATPXID']
683
+ insurance_id = record['MAPATINID']
684
+ patient_id_to_insurance_id[patient_id] = insurance_id
685
+
686
+ return patient_id_to_insurance_id
687
+
688
+ def parse_z_dat(z_dat_path, config): # Why is this in MediBot and not MediLink?
689
+ """
690
+ Parses the Z.dat file to map Patient IDs to Insurance Names using the provided fixed-width file format.
691
+
692
+ Args:
693
+ z_dat_path (str): Path to the Z.dat file.
694
+ config (dict): Configuration object containing slicing information and other parameters.
695
+
696
+ Returns:
697
+ dict: A dictionary mapping Patient IDs to Insurance Names.
698
+ """
699
+ patient_id_to_insurance_name = {}
700
+
701
+ try:
702
+ # Reading blocks of fixed-width data (up to 5 lines per record)
703
+ for personal_info, insurance_info, service_info, service_info_2, service_info_3 in MediLink_DataMgmt.read_fixed_width_data(z_dat_path):
704
+ # Parsing the data using slice definitions from the config
705
+ parsed_data = MediLink_DataMgmt.parse_fixed_width_data(personal_info, insurance_info, service_info, service_info_2, service_info_3, config.get('MediLink_Config', config))
706
+
707
+ # Extract Patient ID and Insurance Name from parsed data
708
+ patient_id = parsed_data.get('PATID')
709
+ insurance_name = parsed_data.get('INAME')
710
+
711
+ if patient_id and insurance_name:
712
+ patient_id_to_insurance_name[patient_id] = insurance_name
713
+ MediLink_ConfigLoader.log("Mapped Patient ID {} to Insurance Name {}".format(patient_id, insurance_name), config, level="INFO")
714
+
715
+ except FileNotFoundError:
716
+ MediLink_ConfigLoader.log("File not found: {}".format(z_dat_path), config, level="INFO")
717
+ except Exception as e:
718
+ MediLink_ConfigLoader.log("Failed to parse Z.dat: {}".format(str(e)), config, level="INFO")
719
+
720
+ return patient_id_to_insurance_name
721
+
722
+ def load_historical_payer_to_patient_mappings(config):
723
+ """
724
+ Loads historical mappings from multiple Carol's CSV files in a specified directory,
725
+ mapping Payer IDs to sets of Patient IDs.
726
+
727
+ Args:
728
+ config (dict): Configuration object containing the directory path for Carol's CSV files
729
+ and other necessary parameters.
730
+
731
+ Returns:
732
+ dict: A dictionary where each key is a Payer ID and the value is a set of Patient IDs.
733
+ """
734
+ directory_path = os.path.dirname(config['CSV_FILE_PATH'])
735
+ payer_to_patient_ids = defaultdict(set)
736
+
737
+ try:
738
+ # Check if the directory exists
739
+ if not os.path.isdir(directory_path):
740
+ raise FileNotFoundError("Directory '{}' not found.".format(directory_path))
741
+
742
+ # Loop through each file in the directory containing Carol's historical CSVs
743
+ for filename in os.listdir(directory_path):
744
+ file_path = os.path.join(directory_path, filename)
745
+ if filename.endswith('.csv'):
746
+ try:
747
+ with open(file_path, 'r', encoding='utf-8') as csvfile:
748
+ reader = csv.DictReader(csvfile)
749
+ patient_count = 0 # Counter for Patient IDs found in this CSV
750
+ for row in reader:
751
+ if 'Patient ID' not in row or 'Ins1 Payer ID' not in row:
752
+ continue # Skip this row if either key is missing
753
+ if not row.get('Patient ID').strip() or not row.get('Ins1 Payer ID').strip():
754
+ continue # Skip this row if either value is missing or empty
755
+
756
+ payer_id = row['Ins1 Payer ID'].strip()
757
+ patient_id = row['Patient ID'].strip()
758
+ payer_to_patient_ids[payer_id].add(patient_id)
759
+ patient_count += 1 # Increment the counter for each valid mapping
760
+
761
+ # Log the accumulated count for this CSV file
762
+ if patient_count > 0:
763
+ MediLink_ConfigLoader.log("CSV file '{}' has {} Patient IDs with Payer IDs.".format(filename, patient_count), level="DEBUG")
764
+ else:
765
+ MediLink_ConfigLoader.log("CSV file '{}' is empty or does not have valid Patient ID or Payer ID mappings.".format(filename), level="DEBUG")
766
+ except Exception as e:
767
+ print("Error processing file {}: {}".format(filename, e))
768
+ MediLink_ConfigLoader.log("Error processing file '{}': {}".format(filename, e), level="ERROR")
769
+ except FileNotFoundError as e:
770
+ print("Error: {}".format(e))
771
+
772
+ if not payer_to_patient_ids:
773
+ print("No historical mappings were generated.")
774
+
775
+ return dict(payer_to_patient_ids)
776
+
777
+ def capitalize_all_fields(csv_data):
778
+ """
779
+ Converts all text fields in the CSV data to uppercase.
780
+
781
+ Parameters:
782
+ csv_data (list of dict): The CSV data where each row is represented as a dictionary.
783
+
784
+ Returns:
785
+ None: The function modifies the csv_data in place.
786
+ """
787
+ for row in csv_data:
788
+ for key, value in row.items():
789
+ if isinstance(value, str):
790
+ row[key] = value.upper()
791
+ elif isinstance(value, datetime):
792
+ # Keep datetime objects as they are
793
+ pass
794
+ elif value is not None:
795
+ # Convert any other non-None values to string and then uppercase
780
796
  row[key] = str(value).upper()