medicafe 0.250529.1__py3-none-any.whl → 0.250529.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of medicafe might be problematic. Click here for more details.

@@ -1,743 +1,743 @@
1
- #MediBot_Preprocessor_lib.py
2
- from collections import OrderedDict, defaultdict
3
- from datetime import datetime, timedelta
4
- import os, csv, sys
5
- import chardet # Ensure chardet is imported
6
-
7
- # Add the parent directory of the project to the Python path
8
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
-
10
- # Attempt to import necessary modules, falling back if they are not found
11
- try:
12
- import MediLink_ConfigLoader
13
- import MediLink_DataMgmt
14
- except ImportError:
15
- from MediLink import MediLink_ConfigLoader, MediLink_DataMgmt
16
-
17
- try:
18
- from MediBot_UI import app_control
19
- from MediBot_docx_decoder import parse_docx
20
- except ImportError:
21
- from MediBot import MediBot_UI
22
- app_control = MediBot_UI.app_control
23
- from MediBot import MediBot_docx_decoder
24
- parse_docx = MediBot_docx_decoder.parse_docx
25
-
26
- class InitializationError(Exception):
27
- def __init__(self, message):
28
- self.message = message
29
- super().__init__(self.message)
30
-
31
- def initialize(config):
32
- global AHK_EXECUTABLE, CSV_FILE_PATH, field_mapping, page_end_markers
33
-
34
- required_keys = {
35
- 'AHK_EXECUTABLE': "",
36
- 'CSV_FILE_PATH': "",
37
- 'field_mapping': {},
38
- 'page_end_markers': []
39
- }
40
-
41
- for key, default in required_keys.items():
42
- try:
43
- globals()[key] = config.get(key, default) if key != 'field_mapping' else OrderedDict(config.get(key, default))
44
- except AttributeError:
45
- raise InitializationError("Error: '{}' not found in config.".format(key))
46
-
47
- def open_csv_for_editing(csv_file_path):
48
- try:
49
- # Open the CSV file with its associated application
50
- os.system('start "" "{}"'.format(csv_file_path))
51
- print("After saving the revised CSV, please re-run MediBot.")
52
- except Exception as e:
53
- print("Failed to open CSV file:", e)
54
-
55
- # Function to clean the headers
56
- def clean_header(headers):
57
- """
58
- Cleans the header strings by removing unwanted characters and trimming whitespace.
59
-
60
- Parameters:
61
- headers (list of str): The original header strings.
62
-
63
- Returns:
64
- list of str: The cleaned header strings.
65
- """
66
- cleaned_headers = []
67
-
68
- for header in headers:
69
- # Strip leading and trailing whitespace
70
- cleaned_header = header.strip()
71
- # Remove unwanted characters while keeping spaces, alphanumeric characters, hyphens, and underscores
72
- cleaned_header = ''.join(char for char in cleaned_header if char.isalnum() or char.isspace() or char in ['-', '_'])
73
- cleaned_headers.append(cleaned_header)
74
-
75
- # Log the original and cleaned headers for debugging
76
- MediLink_ConfigLoader.log("Original headers: {}".format(headers), level="INFO")
77
- MediLink_ConfigLoader.log("Cleaned headers: {}".format(cleaned_headers), level="INFO")
78
-
79
- # Check if 'Surgery Date' is in the cleaned headers
80
- if 'Surgery Date' not in cleaned_headers:
81
- MediLink_ConfigLoader.log("WARNING: 'Surgery Date' header not found after cleaning.", level="WARNING")
82
- print("WARNING: 'Surgery Date' header not found after cleaning.")
83
- raise ValueError("Error: 'Surgery Date' header not found after cleaning.")
84
-
85
- return cleaned_headers
86
-
87
- # Function to load and process CSV data
88
- def load_csv_data(csv_file_path):
89
- try:
90
- # Check if the file exists
91
- if not os.path.exists(csv_file_path):
92
- raise FileNotFoundError("***Error: CSV file '{}' not found.".format(csv_file_path))
93
-
94
- # Detect the file encoding
95
- with open(csv_file_path, 'rb') as f:
96
- raw_data = f.read()
97
- result = chardet.detect(raw_data)
98
- encoding = result['encoding']
99
- confidence = result['confidence']
100
- print("Detected encoding: {} (Confidence: {:.2f})".format(encoding, confidence))
101
-
102
- # Read the CSV file with the detected encoding
103
- with open(csv_file_path, 'r', encoding=encoding) as csvfile:
104
- reader = csv.DictReader(csvfile)
105
- # Clean the headers
106
- cleaned_headers = clean_header(reader.fieldnames)
107
-
108
- # Create a mapping of cleaned headers to original headers
109
- header_mapping = {cleaned_headers[i]: reader.fieldnames[i] for i in range(len(cleaned_headers))}
110
-
111
- # Process the remaining rows
112
- csv_data = []
113
- for row in reader:
114
- cleaned_row = {cleaned_headers[i]: row[header_mapping[cleaned_headers[i]]] for i in range(len(cleaned_headers))}
115
- csv_data.append(cleaned_row)
116
-
117
- return csv_data # Return a list of dictionaries
118
- except FileNotFoundError as e:
119
- print(e) # Print the informative error message
120
- print("Hint: Check if CSV file is located in the expected directory or specify a different path in config file.")
121
- print("Please correct the issue and re-run MediBot.")
122
- sys.exit(1) # Halt the script
123
- except IOError as e:
124
- print("Error reading CSV file: {}. Please check the file path and permissions.".format(e))
125
- sys.exit(1) # Halt the script in case of other IO errors
126
-
127
- # CSV Pre-processor Helper functions
128
- def add_columns(csv_data, column_headers):
129
- """
130
- Adds one or multiple columns to the CSV data.
131
-
132
- Parameters:
133
- csv_data (list of dict): The CSV data where each row is represented as a dictionary.
134
- column_headers (list of str or str): A list of column headers to be added to each row, or a single column header.
135
-
136
- Returns:
137
- None: The function modifies the csv_data in place.
138
- """
139
- if isinstance(column_headers, str):
140
- column_headers = [column_headers]
141
- elif not isinstance(column_headers, list):
142
- raise ValueError("column_headers should be a list or a string")
143
-
144
- for row in csv_data:
145
- for header in column_headers:
146
- row[header] = '' # Initialize the column with empty values
147
-
148
- # Extracting the list to a variable for future refactoring:
149
- def filter_rows(csv_data):
150
- # TODO: This should be handled in the crosswalk.
151
- excluded_insurance = {'AETNA', 'AETNA MEDICARE', 'HUMANA MED HMO'}
152
- csv_data[:] = [row for row in csv_data if row.get('Patient ID') and row.get('Primary Insurance') not in excluded_insurance]
153
-
154
- def convert_surgery_date(csv_data):
155
- for row in csv_data:
156
- surgery_date_str = row.get('Surgery Date', '')
157
- if not surgery_date_str:
158
- MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
159
- # BUG This needs a cleaning step for the Surgery Date string in case we're receiving damaged data.
160
- row['Surgery Date'] = datetime.min # Assign a minimum datetime value if empty
161
- print("Surgery Date not found for row: {}".format(row))
162
- else:
163
- try:
164
- row['Surgery Date'] = datetime.strptime(surgery_date_str, '%m/%d/%Y')
165
- except ValueError as e:
166
- MediLink_ConfigLoader.log("Error parsing Surgery Date '{}': {} for row: {}".format(surgery_date_str, e, row), level="ERROR")
167
- row['Surgery Date'] = datetime.min # Assign a minimum datetime value if parsing fails
168
-
169
- def sort_and_deduplicate(csv_data):
170
- # Create a dictionary to hold unique patients based on Patient ID
171
- unique_patients = {}
172
-
173
- # Iterate through the CSV data and populate the unique_patients dictionary
174
- for row in csv_data:
175
- patient_id = row.get('Patient ID')
176
- if patient_id not in unique_patients:
177
- unique_patients[patient_id] = row
178
- else:
179
- # If the patient ID already exists, compare surgery dates
180
- existing_row = unique_patients[patient_id]
181
- if row['Surgery Date'] < existing_row['Surgery Date']:
182
- unique_patients[patient_id] = row
183
-
184
- # Convert the unique_patients dictionary back to a list and sort it
185
- csv_data[:] = sorted(unique_patients.values(), key=lambda x: (x['Surgery Date'], x.get('Patient Last', '').strip())) # TODO Does this need to be sorted twice? once before and once after?
186
-
187
- # TODO: Consider adding an option in the config to sort based on Surgery Schedules when available.
188
- # If no schedule is available, the current sorting strategy will be used.
189
-
190
- def combine_fields(csv_data):
191
- for row in csv_data:
192
- # Safely handle the 'Surgery Date' conversion
193
- surgery_date = row.get('Surgery Date')
194
- row['Surgery Date'] = surgery_date.strftime('%m/%d/%Y') if surgery_date else ''
195
-
196
- first_name = '_'.join(part.strip() for part in row.get('Patient First', '').split()) # Join the first name parts with underscores after cleaning.
197
- middle_name = row.get('Patient Middle', '').strip()
198
- middle_name = middle_name[0] if len(middle_name) > 1 else '' # Take only the first character or empty
199
- last_name = '_'.join(part.strip() for part in row.get('Patient Last', '').split()) # Join the last name parts with underscores after cleaning.
200
- row['Patient Name'] = ', '.join(filter(None, [last_name, first_name, middle_name])) # Join non-empty parts
201
-
202
- address1 = row.get('Patient Address1', '').strip()
203
- address2 = row.get('Patient Address2', '').strip()
204
- row['Patient Street'] = ' '.join(filter(None, [address1, address2])) # Join non-empty addresses
205
-
206
- def apply_replacements(csv_data, crosswalk):
207
- replacements = crosswalk.get('csv_replacements', {})
208
- for row in csv_data:
209
- for old_value, new_value in replacements.items():
210
- for key in ['Patient SSN', 'Primary Insurance', 'Ins1 Payer ID']:
211
- if row.get(key) == old_value:
212
- row[key] = new_value
213
- break # Exit the loop once a replacement is made
214
-
215
- import difflib
216
- from collections import defaultdict
217
-
218
- def find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names):
219
- """
220
- Finds the best matching Medisoft ID for a given insurance name using fuzzy matching.
221
-
222
- Parameters:
223
- - insurance_name (str): The insurance name from the CSV row.
224
- - medisoft_ids (list): List of Medisoft IDs associated with the Payer ID.
225
- - medisoft_to_mains_names (dict): Mapping from Medisoft ID to list of MAINS names.
226
-
227
- Returns:
228
- - int or None: The best matching Medisoft ID or None if no match is found.
229
- """
230
- best_match_ratio = 0
231
- best_medisoft_id = None
232
-
233
- for medisoft_id in medisoft_ids:
234
- mains_names = medisoft_to_mains_names.get(medisoft_id, [])
235
- for mains_name in mains_names:
236
- # Preprocess names by extracting non-numeric characters and converting to uppercase
237
- processed_mains = ''.join(filter(lambda x: not x.isdigit(), mains_name)).upper()
238
- processed_insurance = ''.join(filter(lambda x: not x.isdigit(), insurance_name)).upper()
239
-
240
- # Log the processed names before computing the match ratio
241
- MediLink_ConfigLoader.log("Processing Medisoft ID '{}': Comparing processed insurance '{}' with processed mains '{}'.".format(medisoft_id, processed_insurance, processed_mains), level="DEBUG")
242
-
243
- # Compute the similarity ratio
244
- match_ratio = difflib.SequenceMatcher(None, processed_insurance, processed_mains).ratio()
245
-
246
- # Log the match ratio
247
- MediLink_ConfigLoader.log("Match ratio for Medisoft ID '{}': {:.2f}".format(medisoft_id, match_ratio), level="DEBUG")
248
-
249
- if match_ratio > best_match_ratio:
250
- best_match_ratio = match_ratio
251
- best_medisoft_id = medisoft_id
252
- # Log the current best match
253
- MediLink_ConfigLoader.log("New best match found: Medisoft ID '{}' with match ratio {:.2f}".format(best_medisoft_id, best_match_ratio), level="DEBUG")
254
-
255
- # Log the final best match ratio and ID
256
- MediLink_ConfigLoader.log("Final best match ratio: {:.2f} for Medisoft ID '{}'".format(best_match_ratio, best_medisoft_id), level="DEBUG")
257
-
258
- # No threshold applied, return the best match found
259
- return best_medisoft_id
260
-
261
- def NEW_update_insurance_ids(csv_data, config, crosswalk):
262
- """
263
- Updates the 'Ins1 Insurance ID' field in each row of csv_data based on the crosswalk and MAINS data.
264
-
265
- Parameters:
266
- - csv_data (list of dict): The CSV data where each row is represented as a dictionary.
267
- - config (dict): Configuration object containing necessary paths and parameters.
268
- - crosswalk (dict): Crosswalk data containing mappings between Payer IDs and Medisoft IDs.
269
-
270
- Returns:
271
- - None: The function modifies the csv_data in place.
272
- """
273
- processed_payer_ids = set() # Track processed Payer IDs
274
- MediLink_ConfigLoader.log("Starting update of insurance IDs.", level="INFO")
275
-
276
- # Load MAINS data to get mapping from Medisoft ID to MAINS names
277
- insurance_to_id = load_insurance_data_from_mains(config) # Assuming it returns a dict mapping insurance names to IDs
278
- MediLink_ConfigLoader.log("Loaded MAINS data for insurance to ID mapping.", level="DEBUG")
279
-
280
- # Invert the mapping to get Medisoft ID to MAINS names
281
- medisoft_to_mains_names = defaultdict(list)
282
- for insurance_name, medisoft_id in insurance_to_id.items():
283
- medisoft_to_mains_names[medisoft_id].append(insurance_name)
284
-
285
- for row in csv_data:
286
- ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
287
- MediLink_ConfigLoader.log("Processing row with Ins1 Payer ID: '{}'.".format(ins1_payer_id), level="DEBUG")
288
-
289
- if ins1_payer_id:
290
- # Mark this Payer ID as processed
291
- if ins1_payer_id not in processed_payer_ids:
292
- processed_payer_ids.add(ins1_payer_id) # Add to set
293
- MediLink_ConfigLoader.log("Marked Payer ID '{}' as processed.".format(ins1_payer_id), level="DEBUG")
294
-
295
- # Retrieve Medisoft IDs for the current Payer ID
296
- medisoft_ids = crosswalk.get('payer_id', {}).get(ins1_payer_id, {}).get('medisoft_id', [])
297
- MediLink_ConfigLoader.log("Retrieved Medisoft IDs for Payer ID '{}': {}".format(ins1_payer_id, medisoft_ids), level="DEBUG")
298
-
299
- if not medisoft_ids:
300
- MediLink_ConfigLoader.log("No Medisoft IDs available for Payer ID '{}', creating placeholder entry.".format(ins1_payer_id), level="WARNING")
301
- # Create a placeholder entry in the crosswalk
302
- if 'payer_id' not in crosswalk:
303
- crosswalk['payer_id'] = {}
304
- crosswalk['payer_id'][ins1_payer_id] = {
305
- 'medisoft_id': [], # Placeholder for future Medisoft IDs
306
- 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
307
- 'endpoint': None # Placeholder for future endpoint
308
- }
309
- continue # Skip further processing for this Payer ID
310
-
311
- # If only one Medisoft ID is associated, assign it directly
312
- if len(medisoft_ids) == 1:
313
- try:
314
- medisoft_id = int(medisoft_ids[0])
315
- row['Ins1 Insurance ID'] = medisoft_id
316
- MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row number {} with Payer ID '{}'.".format(medisoft_id, csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
317
- except ValueError as e:
318
- MediLink_ConfigLoader.log("Error converting Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
319
- row['Ins1 Insurance ID'] = None
320
- continue # Move to the next row
321
-
322
- # If multiple Medisoft IDs are associated, perform fuzzy matching
323
- insurance_name = row.get('Primary Insurance', '').strip()
324
- if not insurance_name:
325
- MediLink_ConfigLoader.log("Row with Payer ID '{}' missing 'Primary Insurance', skipping assignment.".format(ins1_payer_id), level="WARNING")
326
- continue # Skip if insurance name is missing
327
-
328
- best_medisoft_id = find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names)
329
-
330
- if best_medisoft_id:
331
- row['Ins1 Insurance ID'] = best_medisoft_id
332
- MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row with Payer ID '{}' based on fuzzy match.".format(best_medisoft_id, ins1_payer_id), level="INFO")
333
- else:
334
- # Default to the first Medisoft ID if no good match is found
335
- try:
336
- default_medisoft_id = int(medisoft_ids[0])
337
- row['Ins1 Insurance ID'] = default_medisoft_id
338
- MediLink_ConfigLoader.log("No suitable match found. Defaulted to Medisoft ID '{}' for Payer ID '{}'.".format(default_medisoft_id, ins1_payer_id), level="INFO")
339
- except ValueError as e:
340
- MediLink_ConfigLoader.log("Error converting default Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
341
- row['Ins1 Insurance ID'] = None
342
-
343
- def update_insurance_ids(csv_data, config, crosswalk):
344
- MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
345
-
346
- # Create a dictionary to hold Medisoft IDs for each payer ID in the crosswalk
347
- payer_id_to_medisoft = {}
348
- MediLink_ConfigLoader.log("Initialized payer_id_to_medisoft dictionary.", level="DEBUG")
349
-
350
- # Populate the dictionary with data from the crosswalk
351
- for payer_id, details in crosswalk.get('payer_id', {}).items():
352
- medisoft_ids = details.get('medisoft_id', [])
353
- # Filter out empty strings and take the first valid ID
354
- medisoft_ids = [id for id in medisoft_ids if id]
355
- payer_id_to_medisoft[payer_id] = int(medisoft_ids[0]) if medisoft_ids else None
356
- MediLink_ConfigLoader.log("Processed Payer ID '{}': Medisoft IDs found: {}".format(payer_id, medisoft_ids), level="DEBUG")
357
-
358
- # Process the csv_data
359
- for row in csv_data:
360
- ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
361
- MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
362
-
363
- if ins1_payer_id not in payer_id_to_medisoft:
364
- # Add placeholder entry for new payer ID
365
- payer_id_to_medisoft[ins1_payer_id] = None # No Medisoft ID available
366
- crosswalk.setdefault('payer_id', {})[ins1_payer_id] = {
367
- 'medisoft_id': [], # Placeholder for future Medisoft IDs
368
- 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
369
- 'endpoint': None # Placeholder for future endpoint
370
- }
371
- MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
372
-
373
- # Assign the Medisoft ID to the row
374
- row['Ins1 Insurance ID'] = payer_id_to_medisoft[ins1_payer_id]
375
- MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row with Ins1 Payer ID '{}'.".format(row['Ins1 Insurance ID'], ins1_payer_id), level="DEBUG")
376
-
377
- def update_procedure_codes(csv_data, crosswalk):
378
-
379
- # Get Medisoft shorthand dictionary from crosswalk and reverse it
380
- diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {}) # BUG We need to be careful here in case we decide we need to change the crosswalk data specifically with regard to the T8/H usage.
381
- medisoft_to_diagnosis = {v: k for k, v in diagnosis_to_medisoft.items()}
382
-
383
- # Get procedure code to diagnosis dictionary from crosswalk and reverse it for easier lookup
384
- diagnosis_to_procedure = {
385
- diagnosis_code: procedure_code
386
- for procedure_code, diagnosis_codes in crosswalk.get('procedure_to_diagnosis', {}).items()
387
- for diagnosis_code in diagnosis_codes
388
- }
389
-
390
- # Initialize counter for updated rows
391
- updated_count = 0
392
-
393
- # Update the "Procedure Code" column in the CSV data
394
- for row_num, row in enumerate(csv_data, start=1):
395
- try:
396
- medisoft_code = row.get('Default Diagnosis #1', '').strip()
397
- diagnosis_code = medisoft_to_diagnosis.get(medisoft_code)
398
- if diagnosis_code:
399
- procedure_code = diagnosis_to_procedure.get(diagnosis_code)
400
- if procedure_code:
401
- row['Procedure Code'] = procedure_code
402
- updated_count += 1
403
- else:
404
- row['Procedure Code'] = "Unknown" # Or handle as appropriate
405
- else:
406
- row['Procedure Code'] = "Unknown" # Or handle as appropriate
407
- except Exception as e:
408
- MediLink_ConfigLoader.log("In update_procedure_codes, Error processing row {}: {}".format(row_num, e), level="ERROR")
409
-
410
- # Log total count of updated rows
411
- MediLink_ConfigLoader.log("Total {} 'Procedure Code' rows updated.".format(updated_count), level="INFO")
412
-
413
- return True
414
-
415
- def update_diagnosis_codes(csv_data):
416
- try:
417
- # Load configuration and crosswalk
418
- config, crosswalk = MediLink_ConfigLoader.load_configuration()
419
-
420
- # Extract the local storage path from the configuration
421
- local_storage_path = config['MediLink_Config']['local_storage_path']
422
-
423
- # Initialize a dictionary to hold diagnosis codes from all DOCX files
424
- all_patient_data = {}
425
-
426
- # Convert surgery dates in CSV data
427
- convert_surgery_date(csv_data)
428
-
429
- # Extract all valid surgery dates from csv_data
430
- surgery_dates = [row['Surgery Date'] for row in csv_data if row['Surgery Date'] != datetime.min]
431
-
432
- if not surgery_dates:
433
- raise ValueError("No valid surgery dates found in csv_data.")
434
-
435
- # Determine the minimum and maximum surgery dates
436
- min_surgery_date = min(surgery_dates)
437
- max_surgery_date = max(surgery_dates)
438
-
439
- # Apply a ±8-day margin to the surgery dates... Increased from 5 days.
440
- margin = timedelta(days=8)
441
- threshold_start = min_surgery_date - margin
442
- threshold_end = max_surgery_date + margin
443
-
444
- # TODO (Low) This is a bad idea. We need a better way to handle this because it leaves
445
- # us with a situation where if we take 'too long' to download the DOCX files, it will presume that the DOCX files are out of range because
446
- # the modfied date is a bad proxy for the date of the surgery which would be contained inside the DOCX file. The processing overhead for extracting the
447
- # date of the surgery from the DOCX file is non-trivial and computationally expensive so we need a smarter way to handle this.
448
-
449
- MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
450
-
451
- # Gather all relevant DOCX files in the specified directory
452
- docx_files = [
453
- os.path.join(local_storage_path, filename)
454
- for filename in os.listdir(local_storage_path)
455
- if filename.endswith(".docx")
456
- ]
457
-
458
- # Filter files based on modification time
459
- valid_files = [
460
- filepath for filepath in docx_files
461
- if threshold_start <= datetime.fromtimestamp(os.path.getmtime(filepath)) <= threshold_end
462
- ]
463
-
464
- # Process valid DOCX files
465
- for filepath in valid_files:
466
- MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
467
- try:
468
- patient_data = parse_docx(filepath, surgery_dates) # Pass surgery_dates to parse_docx
469
- for patient_id, service_dates in patient_data.items():
470
- if patient_id not in all_patient_data:
471
- all_patient_data[patient_id] = {}
472
- for date_of_service, diagnosis_data in service_dates.items():
473
- all_patient_data[patient_id][date_of_service] = diagnosis_data
474
- except Exception as e:
475
- MediLink_ConfigLoader.log("Error parsing DOCX file {}: {}".format(filepath, e), level="ERROR")
476
-
477
- # Log if no valid files were found
478
- if not valid_files:
479
- MediLink_ConfigLoader.log("No valid DOCX files found within the modification time threshold.", level="INFO")
480
-
481
- # Debug logging for all_patient_data
482
- MediLink_ConfigLoader.log("All patient data collected from DOCX files: {}".format(all_patient_data), level="DEBUG")
483
-
484
- # Extract patient IDs from csv_data for efficient matching
485
- patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
486
-
487
- # Check if any patient data was collected
488
- if not all_patient_data or not patient_ids_in_csv.intersection(all_patient_data.keys()):
489
- MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
490
- return # Exit the function early if no data is available
491
-
492
- # Get Medisoft shorthand dictionary from crosswalk.
493
- diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {})
494
-
495
- # Initialize counter for updated rows
496
- updated_count = 0
497
-
498
- # Update the "Default Diagnosis #1" column in the CSV data
499
- for row_num, row in enumerate(csv_data, start=1):
500
- patient_id = row.get('Patient ID', '').strip()
501
- if patient_id not in patient_ids_in_csv:
502
- continue # Skip rows that do not match any patient ID
503
-
504
- MediLink_ConfigLoader.log("Processing row number {}.".format(row_num), level="DEBUG")
505
- surgery_date = row.get('Surgery Date', '')
506
-
507
- # Convert surgery_date to string format for lookup
508
- if surgery_date != datetime.min:
509
- surgery_date_str = surgery_date.strftime("%m-%d-%Y")
510
- else:
511
- surgery_date_str = ''
512
-
513
- MediLink_ConfigLoader.log("Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
514
-
515
- if patient_id in all_patient_data:
516
- if surgery_date_str in all_patient_data[patient_id]:
517
- diagnosis_code, left_or_right_eye, femto_yes_or_no = all_patient_data[patient_id][surgery_date_str]
518
- MediLink_ConfigLoader.log("Found diagnosis data for Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
519
-
520
- # Convert diagnosis code to Medisoft shorthand format.
521
- medisoft_shorthand = diagnosis_to_medisoft.get(diagnosis_code, None)
522
- if medisoft_shorthand is None and diagnosis_code:
523
- defaulted_code = diagnosis_code.lstrip('H').lstrip('T8').replace('.', '')[-5:]
524
- medisoft_shorthand = defaulted_code
525
- MediLink_ConfigLoader.log("Converted diagnosis code to Medisoft shorthand: {}".format(medisoft_shorthand), level="DEBUG")
526
-
527
- row['Default Diagnosis #1'] = medisoft_shorthand
528
- updated_count += 1
529
- MediLink_ConfigLoader.log("Updated row number {} with new diagnosis code.".format(row_num), level="INFO")
530
- else:
531
- MediLink_ConfigLoader.log("No matching surgery date found for Patient ID: {} in row {}.".format(patient_id, row_num), level="INFO")
532
- else:
533
- MediLink_ConfigLoader.log("Patient ID: {} not found in DOCX data for row {}.".format(patient_id, row_num), level="INFO")
534
-
535
- # Log total count of updated rows
536
- MediLink_ConfigLoader.log("Total {} 'Default Diagnosis #1' rows updated.".format(updated_count), level="INFO")
537
-
538
- except Exception as e:
539
- message = "An error occurred while updating diagnosis codes. Please check the DOCX files and configuration: {}".format(e)
540
- MediLink_ConfigLoader.log(message, level="ERROR")
541
- print(message)
542
-
543
- def load_data_sources(config, crosswalk):
544
- """Loads historical mappings from MAPAT and Carol's CSVs."""
545
- patient_id_to_insurance_id = load_insurance_data_from_mapat(config, crosswalk)
546
- if not patient_id_to_insurance_id:
547
- raise ValueError("Failed to load historical Patient ID to Insurance ID mappings from MAPAT.")
548
-
549
- payer_id_to_patient_ids = load_historical_payer_to_patient_mappings(config)
550
- if not payer_id_to_patient_ids:
551
- raise ValueError("Failed to load historical Carol's CSVs.")
552
-
553
- return patient_id_to_insurance_id, payer_id_to_patient_ids
554
-
555
- def map_payer_ids_to_insurance_ids(patient_id_to_insurance_id, payer_id_to_patient_ids):
556
- """Maps Payer IDs to Insurance IDs based on the historical mappings."""
557
- payer_id_to_details = {}
558
- for payer_id, patient_ids in payer_id_to_patient_ids.items():
559
- medisoft_ids = set()
560
- for patient_id in patient_ids:
561
- if patient_id in patient_id_to_insurance_id:
562
- medisoft_id = patient_id_to_insurance_id[patient_id]
563
- medisoft_ids.add(medisoft_id)
564
- MediLink_ConfigLoader.log("Added Medisoft ID {} for Patient ID {} and Payer ID {}".format(medisoft_id, patient_id, payer_id))
565
- else:
566
- MediLink_ConfigLoader.log("No matching Insurance ID found for Patient ID {}".format(patient_id))
567
- if medisoft_ids:
568
- payer_id_to_details[payer_id] = {
569
- "endpoint": "OPTUMEDI", # TODO Default, to be refined via API poll. There are 2 of these defaults!
570
- "medisoft_id": list(medisoft_ids),
571
- "medisoft_medicare_id": [] # Placeholder for future implementation
572
- }
573
- return payer_id_to_details
574
-
575
- def load_insurance_data_from_mains(config):
576
- """
577
- Loads insurance data from MAINS and creates a mapping from insurance names to their respective IDs.
578
- This mapping is critical for the crosswalk update process to correctly associate payer IDs with insurance IDs.
579
-
580
- Args:
581
- config (dict): Configuration object containing necessary paths and parameters.
582
-
583
- Returns:
584
- dict: A dictionary mapping insurance names to insurance IDs.
585
- """
586
- # Reset config pull to make sure its not using the MediLink config key subset
587
- config, crosswalk = MediLink_ConfigLoader.load_configuration()
588
-
589
- # Retrieve MAINS path and slicing information from the configuration
590
- # TODO (Low) For secondary insurance, this needs to be pulling from the correct MAINS (there are 2)
591
- # TODO (Low) Performance: There probably needs to be a dictionary proxy for MAINS that gets updated.
592
- # Meh, this just has to be part of the new architecture plan where we make Medisoft a downstream
593
- # recipient from the db.
594
- # TODO (High) The Medisoft Medicare flag needs to be brought in here.
595
- mains_path = config['MAINS_MED_PATH']
596
- mains_slices = crosswalk['mains_mapping']['slices']
597
-
598
- # Initialize the dictionary to hold the insurance to insurance ID mappings
599
- insurance_to_id = {}
600
-
601
- # Read data from MAINS using a provided function to handle fixed-width data
602
- for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
603
- insurance_name = record['MAINSNAME']
604
- # Assuming line_number gives the correct insurance ID without needing adjustment
605
- insurance_to_id[insurance_name] = line_number
606
-
607
- return insurance_to_id
608
-
609
- def load_insurance_data_from_mapat(config, crosswalk):
610
- """
611
- Loads insurance data from MAPAT and creates a mapping from patient ID to insurance ID.
612
-
613
- Args:
614
- config (dict): Configuration object containing necessary paths and parameters.
615
- crosswalk ... ADD HERE.
616
-
617
- Returns:
618
- dict: A dictionary mapping patient IDs to insurance IDs.
619
- """
620
- # Retrieve MAPAT path and slicing information from the configuration
621
- mapat_path = app_control.get_mapat_med_path()
622
- mapat_slices = crosswalk['mapat_mapping']['slices']
623
-
624
- # Initialize the dictionary to hold the patient ID to insurance ID mappings
625
- patient_id_to_insurance_id = {}
626
-
627
- # Read data from MAPAT using a provided function to handle fixed-width data
628
- for record, _ in MediLink_DataMgmt.read_general_fixed_width_data(mapat_path, mapat_slices):
629
- patient_id = record['MAPATPXID']
630
- insurance_id = record['MAPATINID']
631
- patient_id_to_insurance_id[patient_id] = insurance_id
632
-
633
- return patient_id_to_insurance_id
634
-
635
- def parse_z_dat(z_dat_path, config): # Why is this in MediBot and not MediLink?
636
- """
637
- Parses the Z.dat file to map Patient IDs to Insurance Names using the provided fixed-width file format.
638
-
639
- Args:
640
- z_dat_path (str): Path to the Z.dat file.
641
- config (dict): Configuration object containing slicing information and other parameters.
642
-
643
- Returns:
644
- dict: A dictionary mapping Patient IDs to Insurance Names.
645
- """
646
- patient_id_to_insurance_name = {}
647
-
648
- try:
649
- # Reading blocks of fixed-width data (up to 5 lines per record)
650
- for personal_info, insurance_info, service_info, service_info_2, service_info_3 in MediLink_DataMgmt.read_fixed_width_data(z_dat_path):
651
- # Parsing the data using slice definitions from the config
652
- parsed_data = MediLink_DataMgmt.parse_fixed_width_data(personal_info, insurance_info, service_info, service_info_2, service_info_3, config.get('MediLink_Config', config))
653
-
654
- # Extract Patient ID and Insurance Name from parsed data
655
- patient_id = parsed_data.get('PATID')
656
- insurance_name = parsed_data.get('INAME')
657
-
658
- if patient_id and insurance_name:
659
- patient_id_to_insurance_name[patient_id] = insurance_name
660
- MediLink_ConfigLoader.log("Mapped Patient ID {} to Insurance Name {}".format(patient_id, insurance_name), config, level="INFO")
661
-
662
- except FileNotFoundError:
663
- MediLink_ConfigLoader.log("File not found: {}".format(z_dat_path), config, level="INFO")
664
- except Exception as e:
665
- MediLink_ConfigLoader.log("Failed to parse Z.dat: {}".format(str(e)), config, level="INFO")
666
-
667
- return patient_id_to_insurance_name
668
-
669
- def load_historical_payer_to_patient_mappings(config):
670
- """
671
- Loads historical mappings from multiple Carol's CSV files in a specified directory,
672
- mapping Payer IDs to sets of Patient IDs.
673
-
674
- Args:
675
- config (dict): Configuration object containing the directory path for Carol's CSV files
676
- and other necessary parameters.
677
-
678
- Returns:
679
- dict: A dictionary where each key is a Payer ID and the value is a set of Patient IDs.
680
- """
681
- directory_path = os.path.dirname(config['CSV_FILE_PATH'])
682
- payer_to_patient_ids = defaultdict(set)
683
-
684
- try:
685
- # Check if the directory exists
686
- if not os.path.isdir(directory_path):
687
- raise FileNotFoundError("Directory '{}' not found.".format(directory_path))
688
-
689
- # Loop through each file in the directory containing Carol's historical CSVs
690
- for filename in os.listdir(directory_path):
691
- file_path = os.path.join(directory_path, filename)
692
- if filename.endswith('.csv'):
693
- try:
694
- with open(file_path, 'r', encoding='utf-8') as csvfile:
695
- reader = csv.DictReader(csvfile)
696
- patient_count = 0 # Counter for Patient IDs found in this CSV
697
- for row in reader:
698
- if 'Patient ID' not in row or 'Ins1 Payer ID' not in row:
699
- continue # Skip this row if either key is missing
700
- if not row.get('Patient ID').strip() or not row.get('Ins1 Payer ID').strip():
701
- continue # Skip this row if either value is missing or empty
702
-
703
- payer_id = row['Ins1 Payer ID'].strip()
704
- patient_id = row['Patient ID'].strip()
705
- payer_to_patient_ids[payer_id].add(patient_id)
706
- patient_count += 1 # Increment the counter for each valid mapping
707
-
708
- # Log the accumulated count for this CSV file
709
- if patient_count > 0:
710
- MediLink_ConfigLoader.log("CSV file '{}' has {} Patient IDs with Payer IDs.".format(filename, patient_count), level="DEBUG")
711
- else:
712
- MediLink_ConfigLoader.log("CSV file '{}' is empty or does not have valid Patient ID or Payer ID mappings.".format(filename), level="DEBUG")
713
- except Exception as e:
714
- print("Error processing file {}: {}".format(filename, e))
715
- MediLink_ConfigLoader.log("Error processing file '{}': {}".format(filename, e), level="ERROR")
716
- except FileNotFoundError as e:
717
- print("Error: {}".format(e))
718
-
719
- if not payer_to_patient_ids:
720
- print("No historical mappings were generated.")
721
-
722
- return dict(payer_to_patient_ids)
723
-
724
- def capitalize_all_fields(csv_data):
725
- """
726
- Converts all text fields in the CSV data to uppercase.
727
-
728
- Parameters:
729
- csv_data (list of dict): The CSV data where each row is represented as a dictionary.
730
-
731
- Returns:
732
- None: The function modifies the csv_data in place.
733
- """
734
- for row in csv_data:
735
- for key, value in row.items():
736
- if isinstance(value, str):
737
- row[key] = value.upper()
738
- elif isinstance(value, datetime):
739
- # Keep datetime objects as they are
740
- pass
741
- elif value is not None:
742
- # Convert any other non-None values to string and then uppercase
1
+ #MediBot_Preprocessor_lib.py
2
+ from collections import OrderedDict, defaultdict
3
+ from datetime import datetime, timedelta
4
+ import os, csv, sys
5
+ import chardet # Ensure chardet is imported
6
+
7
+ # Add the parent directory of the project to the Python path
8
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
+
10
+ # Attempt to import necessary modules, falling back if they are not found
11
+ try:
12
+ import MediLink_ConfigLoader
13
+ import MediLink_DataMgmt
14
+ except ImportError:
15
+ from MediLink import MediLink_ConfigLoader, MediLink_DataMgmt
16
+
17
+ try:
18
+ from MediBot_UI import app_control
19
+ from MediBot_docx_decoder import parse_docx
20
+ except ImportError:
21
+ from MediBot import MediBot_UI
22
+ app_control = MediBot_UI.app_control
23
+ from MediBot import MediBot_docx_decoder
24
+ parse_docx = MediBot_docx_decoder.parse_docx
25
+
26
+ class InitializationError(Exception):
27
+ def __init__(self, message):
28
+ self.message = message
29
+ super().__init__(self.message)
30
+
31
+ def initialize(config):
32
+ global AHK_EXECUTABLE, CSV_FILE_PATH, field_mapping, page_end_markers
33
+
34
+ required_keys = {
35
+ 'AHK_EXECUTABLE': "",
36
+ 'CSV_FILE_PATH': "",
37
+ 'field_mapping': {},
38
+ 'page_end_markers': []
39
+ }
40
+
41
+ for key, default in required_keys.items():
42
+ try:
43
+ globals()[key] = config.get(key, default) if key != 'field_mapping' else OrderedDict(config.get(key, default))
44
+ except AttributeError:
45
+ raise InitializationError("Error: '{}' not found in config.".format(key))
46
+
47
+ def open_csv_for_editing(csv_file_path):
48
+ try:
49
+ # Open the CSV file with its associated application
50
+ os.system('start "" "{}"'.format(csv_file_path))
51
+ print("After saving the revised CSV, please re-run MediBot.")
52
+ except Exception as e:
53
+ print("Failed to open CSV file:", e)
54
+
55
+ # Function to clean the headers
56
+ def clean_header(headers):
57
+ """
58
+ Cleans the header strings by removing unwanted characters and trimming whitespace.
59
+
60
+ Parameters:
61
+ headers (list of str): The original header strings.
62
+
63
+ Returns:
64
+ list of str: The cleaned header strings.
65
+ """
66
+ cleaned_headers = []
67
+
68
+ for header in headers:
69
+ # Strip leading and trailing whitespace
70
+ cleaned_header = header.strip()
71
+ # Remove unwanted characters while keeping spaces, alphanumeric characters, hyphens, and underscores
72
+ cleaned_header = ''.join(char for char in cleaned_header if char.isalnum() or char.isspace() or char in ['-', '_'])
73
+ cleaned_headers.append(cleaned_header)
74
+
75
+ # Log the original and cleaned headers for debugging
76
+ MediLink_ConfigLoader.log("Original headers: {}".format(headers), level="INFO")
77
+ MediLink_ConfigLoader.log("Cleaned headers: {}".format(cleaned_headers), level="INFO")
78
+
79
+ # Check if 'Surgery Date' is in the cleaned headers
80
+ if 'Surgery Date' not in cleaned_headers:
81
+ MediLink_ConfigLoader.log("WARNING: 'Surgery Date' header not found after cleaning.", level="WARNING")
82
+ print("WARNING: 'Surgery Date' header not found after cleaning.")
83
+ raise ValueError("Error: 'Surgery Date' header not found after cleaning.")
84
+
85
+ return cleaned_headers
86
+
87
+ # Function to load and process CSV data
88
+ def load_csv_data(csv_file_path):
89
+ try:
90
+ # Check if the file exists
91
+ if not os.path.exists(csv_file_path):
92
+ raise FileNotFoundError("***Error: CSV file '{}' not found.".format(csv_file_path))
93
+
94
+ # Detect the file encoding
95
+ with open(csv_file_path, 'rb') as f:
96
+ raw_data = f.read()
97
+ result = chardet.detect(raw_data)
98
+ encoding = result['encoding']
99
+ confidence = result['confidence']
100
+ print("Detected encoding: {} (Confidence: {:.2f})".format(encoding, confidence))
101
+
102
+ # Read the CSV file with the detected encoding
103
+ with open(csv_file_path, 'r', encoding=encoding) as csvfile:
104
+ reader = csv.DictReader(csvfile)
105
+ # Clean the headers
106
+ cleaned_headers = clean_header(reader.fieldnames)
107
+
108
+ # Create a mapping of cleaned headers to original headers
109
+ header_mapping = {cleaned_headers[i]: reader.fieldnames[i] for i in range(len(cleaned_headers))}
110
+
111
+ # Process the remaining rows
112
+ csv_data = []
113
+ for row in reader:
114
+ cleaned_row = {cleaned_headers[i]: row[header_mapping[cleaned_headers[i]]] for i in range(len(cleaned_headers))}
115
+ csv_data.append(cleaned_row)
116
+
117
+ return csv_data # Return a list of dictionaries
118
+ except FileNotFoundError as e:
119
+ print(e) # Print the informative error message
120
+ print("Hint: Check if CSV file is located in the expected directory or specify a different path in config file.")
121
+ print("Please correct the issue and re-run MediBot.")
122
+ sys.exit(1) # Halt the script
123
+ except IOError as e:
124
+ print("Error reading CSV file: {}. Please check the file path and permissions.".format(e))
125
+ sys.exit(1) # Halt the script in case of other IO errors
126
+
127
+ # CSV Pre-processor Helper functions
128
+ def add_columns(csv_data, column_headers):
129
+ """
130
+ Adds one or multiple columns to the CSV data.
131
+
132
+ Parameters:
133
+ csv_data (list of dict): The CSV data where each row is represented as a dictionary.
134
+ column_headers (list of str or str): A list of column headers to be added to each row, or a single column header.
135
+
136
+ Returns:
137
+ None: The function modifies the csv_data in place.
138
+ """
139
+ if isinstance(column_headers, str):
140
+ column_headers = [column_headers]
141
+ elif not isinstance(column_headers, list):
142
+ raise ValueError("column_headers should be a list or a string")
143
+
144
+ for row in csv_data:
145
+ for header in column_headers:
146
+ row[header] = '' # Initialize the column with empty values
147
+
148
+ # Extracting the list to a variable for future refactoring:
149
+ def filter_rows(csv_data):
150
+ # TODO: This should be handled in the crosswalk.
151
+ excluded_insurance = {'AETNA', 'AETNA MEDICARE', 'HUMANA MED HMO'}
152
+ csv_data[:] = [row for row in csv_data if row.get('Patient ID') and row.get('Primary Insurance') not in excluded_insurance]
153
+
154
+ def convert_surgery_date(csv_data):
155
+ for row in csv_data:
156
+ surgery_date_str = row.get('Surgery Date', '')
157
+ if not surgery_date_str:
158
+ MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
159
+ # BUG This needs a cleaning step for the Surgery Date string in case we're receiving damaged data.
160
+ row['Surgery Date'] = datetime.min # Assign a minimum datetime value if empty
161
+ print("Surgery Date not found for row: {}".format(row))
162
+ else:
163
+ try:
164
+ row['Surgery Date'] = datetime.strptime(surgery_date_str, '%m/%d/%Y')
165
+ except ValueError as e:
166
+ MediLink_ConfigLoader.log("Error parsing Surgery Date '{}': {} for row: {}".format(surgery_date_str, e, row), level="ERROR")
167
+ row['Surgery Date'] = datetime.min # Assign a minimum datetime value if parsing fails
168
+
169
+ def sort_and_deduplicate(csv_data):
170
+ # Create a dictionary to hold unique patients based on Patient ID
171
+ unique_patients = {}
172
+
173
+ # Iterate through the CSV data and populate the unique_patients dictionary
174
+ for row in csv_data:
175
+ patient_id = row.get('Patient ID')
176
+ if patient_id not in unique_patients:
177
+ unique_patients[patient_id] = row
178
+ else:
179
+ # If the patient ID already exists, compare surgery dates
180
+ existing_row = unique_patients[patient_id]
181
+ if row['Surgery Date'] < existing_row['Surgery Date']:
182
+ unique_patients[patient_id] = row
183
+
184
+ # Convert the unique_patients dictionary back to a list and sort it
185
+ csv_data[:] = sorted(unique_patients.values(), key=lambda x: (x['Surgery Date'], x.get('Patient Last', '').strip())) # TODO Does this need to be sorted twice? once before and once after?
186
+
187
+ # TODO: Consider adding an option in the config to sort based on Surgery Schedules when available.
188
+ # If no schedule is available, the current sorting strategy will be used.
189
+
190
+ def combine_fields(csv_data):
191
+ for row in csv_data:
192
+ # Safely handle the 'Surgery Date' conversion
193
+ surgery_date = row.get('Surgery Date')
194
+ row['Surgery Date'] = surgery_date.strftime('%m/%d/%Y') if surgery_date else ''
195
+
196
+ first_name = '_'.join(part.strip() for part in row.get('Patient First', '').split()) # Join the first name parts with underscores after cleaning.
197
+ middle_name = row.get('Patient Middle', '').strip()
198
+ middle_name = middle_name[0] if len(middle_name) > 1 else '' # Take only the first character or empty
199
+ last_name = '_'.join(part.strip() for part in row.get('Patient Last', '').split()) # Join the last name parts with underscores after cleaning.
200
+ row['Patient Name'] = ', '.join(filter(None, [last_name, first_name])) + (' ' + middle_name if middle_name else '') # Comma between last and first, space before middle
201
+
202
+ address1 = row.get('Patient Address1', '').strip()
203
+ address2 = row.get('Patient Address2', '').strip()
204
+ row['Patient Street'] = ' '.join(filter(None, [address1, address2])) # Join non-empty addresses
205
+
206
+ def apply_replacements(csv_data, crosswalk):
207
+ replacements = crosswalk.get('csv_replacements', {})
208
+ for row in csv_data:
209
+ for old_value, new_value in replacements.items():
210
+ for key in ['Patient SSN', 'Primary Insurance', 'Ins1 Payer ID']:
211
+ if row.get(key) == old_value:
212
+ row[key] = new_value
213
+ break # Exit the loop once a replacement is made
214
+
215
+ import difflib
216
+ from collections import defaultdict
217
+
218
+ def find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names):
219
+ """
220
+ Finds the best matching Medisoft ID for a given insurance name using fuzzy matching.
221
+
222
+ Parameters:
223
+ - insurance_name (str): The insurance name from the CSV row.
224
+ - medisoft_ids (list): List of Medisoft IDs associated with the Payer ID.
225
+ - medisoft_to_mains_names (dict): Mapping from Medisoft ID to list of MAINS names.
226
+
227
+ Returns:
228
+ - int or None: The best matching Medisoft ID or None if no match is found.
229
+ """
230
+ best_match_ratio = 0
231
+ best_medisoft_id = None
232
+
233
+ for medisoft_id in medisoft_ids:
234
+ mains_names = medisoft_to_mains_names.get(medisoft_id, [])
235
+ for mains_name in mains_names:
236
+ # Preprocess names by extracting non-numeric characters and converting to uppercase
237
+ processed_mains = ''.join(filter(lambda x: not x.isdigit(), mains_name)).upper()
238
+ processed_insurance = ''.join(filter(lambda x: not x.isdigit(), insurance_name)).upper()
239
+
240
+ # Log the processed names before computing the match ratio
241
+ MediLink_ConfigLoader.log("Processing Medisoft ID '{}': Comparing processed insurance '{}' with processed mains '{}'.".format(medisoft_id, processed_insurance, processed_mains), level="DEBUG")
242
+
243
+ # Compute the similarity ratio
244
+ match_ratio = difflib.SequenceMatcher(None, processed_insurance, processed_mains).ratio()
245
+
246
+ # Log the match ratio
247
+ MediLink_ConfigLoader.log("Match ratio for Medisoft ID '{}': {:.2f}".format(medisoft_id, match_ratio), level="DEBUG")
248
+
249
+ if match_ratio > best_match_ratio:
250
+ best_match_ratio = match_ratio
251
+ best_medisoft_id = medisoft_id
252
+ # Log the current best match
253
+ MediLink_ConfigLoader.log("New best match found: Medisoft ID '{}' with match ratio {:.2f}".format(best_medisoft_id, best_match_ratio), level="DEBUG")
254
+
255
+ # Log the final best match ratio and ID
256
+ MediLink_ConfigLoader.log("Final best match ratio: {:.2f} for Medisoft ID '{}'".format(best_match_ratio, best_medisoft_id), level="DEBUG")
257
+
258
+ # No threshold applied, return the best match found
259
+ return best_medisoft_id
260
+
261
+ def NEW_update_insurance_ids(csv_data, config, crosswalk):
262
+ """
263
+ Updates the 'Ins1 Insurance ID' field in each row of csv_data based on the crosswalk and MAINS data.
264
+
265
+ Parameters:
266
+ - csv_data (list of dict): The CSV data where each row is represented as a dictionary.
267
+ - config (dict): Configuration object containing necessary paths and parameters.
268
+ - crosswalk (dict): Crosswalk data containing mappings between Payer IDs and Medisoft IDs.
269
+
270
+ Returns:
271
+ - None: The function modifies the csv_data in place.
272
+ """
273
+ processed_payer_ids = set() # Track processed Payer IDs
274
+ MediLink_ConfigLoader.log("Starting update of insurance IDs.", level="INFO")
275
+
276
+ # Load MAINS data to get mapping from Medisoft ID to MAINS names
277
+ insurance_to_id = load_insurance_data_from_mains(config) # Assuming it returns a dict mapping insurance names to IDs
278
+ MediLink_ConfigLoader.log("Loaded MAINS data for insurance to ID mapping.", level="DEBUG")
279
+
280
+ # Invert the mapping to get Medisoft ID to MAINS names
281
+ medisoft_to_mains_names = defaultdict(list)
282
+ for insurance_name, medisoft_id in insurance_to_id.items():
283
+ medisoft_to_mains_names[medisoft_id].append(insurance_name)
284
+
285
+ for row in csv_data:
286
+ ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
287
+ MediLink_ConfigLoader.log("Processing row with Ins1 Payer ID: '{}'.".format(ins1_payer_id), level="DEBUG")
288
+
289
+ if ins1_payer_id:
290
+ # Mark this Payer ID as processed
291
+ if ins1_payer_id not in processed_payer_ids:
292
+ processed_payer_ids.add(ins1_payer_id) # Add to set
293
+ MediLink_ConfigLoader.log("Marked Payer ID '{}' as processed.".format(ins1_payer_id), level="DEBUG")
294
+
295
+ # Retrieve Medisoft IDs for the current Payer ID
296
+ medisoft_ids = crosswalk.get('payer_id', {}).get(ins1_payer_id, {}).get('medisoft_id', [])
297
+ MediLink_ConfigLoader.log("Retrieved Medisoft IDs for Payer ID '{}': {}".format(ins1_payer_id, medisoft_ids), level="DEBUG")
298
+
299
+ if not medisoft_ids:
300
+ MediLink_ConfigLoader.log("No Medisoft IDs available for Payer ID '{}', creating placeholder entry.".format(ins1_payer_id), level="WARNING")
301
+ # Create a placeholder entry in the crosswalk
302
+ if 'payer_id' not in crosswalk:
303
+ crosswalk['payer_id'] = {}
304
+ crosswalk['payer_id'][ins1_payer_id] = {
305
+ 'medisoft_id': [], # Placeholder for future Medisoft IDs
306
+ 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
307
+ 'endpoint': None # Placeholder for future endpoint
308
+ }
309
+ continue # Skip further processing for this Payer ID
310
+
311
+ # If only one Medisoft ID is associated, assign it directly
312
+ if len(medisoft_ids) == 1:
313
+ try:
314
+ medisoft_id = int(medisoft_ids[0])
315
+ row['Ins1 Insurance ID'] = medisoft_id
316
+ MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row number {} with Payer ID '{}'.".format(medisoft_id, csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
317
+ except ValueError as e:
318
+ MediLink_ConfigLoader.log("Error converting Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
319
+ row['Ins1 Insurance ID'] = None
320
+ continue # Move to the next row
321
+
322
+ # If multiple Medisoft IDs are associated, perform fuzzy matching
323
+ insurance_name = row.get('Primary Insurance', '').strip()
324
+ if not insurance_name:
325
+ MediLink_ConfigLoader.log("Row with Payer ID '{}' missing 'Primary Insurance', skipping assignment.".format(ins1_payer_id), level="WARNING")
326
+ continue # Skip if insurance name is missing
327
+
328
+ best_medisoft_id = find_best_medisoft_id(insurance_name, medisoft_ids, medisoft_to_mains_names)
329
+
330
+ if best_medisoft_id:
331
+ row['Ins1 Insurance ID'] = best_medisoft_id
332
+ MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row with Payer ID '{}' based on fuzzy match.".format(best_medisoft_id, ins1_payer_id), level="INFO")
333
+ else:
334
+ # Default to the first Medisoft ID if no good match is found
335
+ try:
336
+ default_medisoft_id = int(medisoft_ids[0])
337
+ row['Ins1 Insurance ID'] = default_medisoft_id
338
+ MediLink_ConfigLoader.log("No suitable match found. Defaulted to Medisoft ID '{}' for Payer ID '{}'.".format(default_medisoft_id, ins1_payer_id), level="INFO")
339
+ except ValueError as e:
340
+ MediLink_ConfigLoader.log("Error converting default Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
341
+ row['Ins1 Insurance ID'] = None
342
+
343
+ def update_insurance_ids(csv_data, config, crosswalk):
344
+ MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
345
+
346
+ # Create a dictionary to hold Medisoft IDs for each payer ID in the crosswalk
347
+ payer_id_to_medisoft = {}
348
+ MediLink_ConfigLoader.log("Initialized payer_id_to_medisoft dictionary.", level="DEBUG")
349
+
350
+ # Populate the dictionary with data from the crosswalk
351
+ for payer_id, details in crosswalk.get('payer_id', {}).items():
352
+ medisoft_ids = details.get('medisoft_id', [])
353
+ # Filter out empty strings and take the first valid ID
354
+ medisoft_ids = [id for id in medisoft_ids if id]
355
+ payer_id_to_medisoft[payer_id] = int(medisoft_ids[0]) if medisoft_ids else None
356
+ MediLink_ConfigLoader.log("Processed Payer ID '{}': Medisoft IDs found: {}".format(payer_id, medisoft_ids), level="DEBUG")
357
+
358
+ # Process the csv_data
359
+ for row in csv_data:
360
+ ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
361
+ MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
362
+
363
+ if ins1_payer_id not in payer_id_to_medisoft:
364
+ # Add placeholder entry for new payer ID
365
+ payer_id_to_medisoft[ins1_payer_id] = None # No Medisoft ID available
366
+ crosswalk.setdefault('payer_id', {})[ins1_payer_id] = {
367
+ 'medisoft_id': [], # Placeholder for future Medisoft IDs
368
+ 'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
369
+ 'endpoint': None # Placeholder for future endpoint
370
+ }
371
+ MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
372
+
373
+ # Assign the Medisoft ID to the row
374
+ row['Ins1 Insurance ID'] = payer_id_to_medisoft[ins1_payer_id]
375
+ MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row with Ins1 Payer ID '{}'.".format(row['Ins1 Insurance ID'], ins1_payer_id), level="DEBUG")
376
+
377
+ def update_procedure_codes(csv_data, crosswalk):
378
+
379
+ # Get Medisoft shorthand dictionary from crosswalk and reverse it
380
+ diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {}) # BUG We need to be careful here in case we decide we need to change the crosswalk data specifically with regard to the T8/H usage.
381
+ medisoft_to_diagnosis = {v: k for k, v in diagnosis_to_medisoft.items()}
382
+
383
+ # Get procedure code to diagnosis dictionary from crosswalk and reverse it for easier lookup
384
+ diagnosis_to_procedure = {
385
+ diagnosis_code: procedure_code
386
+ for procedure_code, diagnosis_codes in crosswalk.get('procedure_to_diagnosis', {}).items()
387
+ for diagnosis_code in diagnosis_codes
388
+ }
389
+
390
+ # Initialize counter for updated rows
391
+ updated_count = 0
392
+
393
+ # Update the "Procedure Code" column in the CSV data
394
+ for row_num, row in enumerate(csv_data, start=1):
395
+ try:
396
+ medisoft_code = row.get('Default Diagnosis #1', '').strip()
397
+ diagnosis_code = medisoft_to_diagnosis.get(medisoft_code)
398
+ if diagnosis_code:
399
+ procedure_code = diagnosis_to_procedure.get(diagnosis_code)
400
+ if procedure_code:
401
+ row['Procedure Code'] = procedure_code
402
+ updated_count += 1
403
+ else:
404
+ row['Procedure Code'] = "Unknown" # Or handle as appropriate
405
+ else:
406
+ row['Procedure Code'] = "Unknown" # Or handle as appropriate
407
+ except Exception as e:
408
+ MediLink_ConfigLoader.log("In update_procedure_codes, Error processing row {}: {}".format(row_num, e), level="ERROR")
409
+
410
+ # Log total count of updated rows
411
+ MediLink_ConfigLoader.log("Total {} 'Procedure Code' rows updated.".format(updated_count), level="INFO")
412
+
413
+ return True
414
+
415
+ def update_diagnosis_codes(csv_data):
416
+ try:
417
+ # Load configuration and crosswalk
418
+ config, crosswalk = MediLink_ConfigLoader.load_configuration()
419
+
420
+ # Extract the local storage path from the configuration
421
+ local_storage_path = config['MediLink_Config']['local_storage_path']
422
+
423
+ # Initialize a dictionary to hold diagnosis codes from all DOCX files
424
+ all_patient_data = {}
425
+
426
+ # Convert surgery dates in CSV data
427
+ convert_surgery_date(csv_data)
428
+
429
+ # Extract all valid surgery dates from csv_data
430
+ surgery_dates = [row['Surgery Date'] for row in csv_data if row['Surgery Date'] != datetime.min]
431
+
432
+ if not surgery_dates:
433
+ raise ValueError("No valid surgery dates found in csv_data.")
434
+
435
+ # Determine the minimum and maximum surgery dates
436
+ min_surgery_date = min(surgery_dates)
437
+ max_surgery_date = max(surgery_dates)
438
+
439
+ # Apply a ±8-day margin to the surgery dates... Increased from 5 days.
440
+ margin = timedelta(days=8)
441
+ threshold_start = min_surgery_date - margin
442
+ threshold_end = max_surgery_date + margin
443
+
444
+ # TODO (Low) This is a bad idea. We need a better way to handle this because it leaves
445
+ # us with a situation where if we take 'too long' to download the DOCX files, it will presume that the DOCX files are out of range because
446
+ # the modfied date is a bad proxy for the date of the surgery which would be contained inside the DOCX file. The processing overhead for extracting the
447
+ # date of the surgery from the DOCX file is non-trivial and computationally expensive so we need a smarter way to handle this.
448
+
449
+ MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
450
+
451
+ # Gather all relevant DOCX files in the specified directory
452
+ docx_files = [
453
+ os.path.join(local_storage_path, filename)
454
+ for filename in os.listdir(local_storage_path)
455
+ if filename.endswith(".docx")
456
+ ]
457
+
458
+ # Filter files based on modification time
459
+ valid_files = [
460
+ filepath for filepath in docx_files
461
+ if threshold_start <= datetime.fromtimestamp(os.path.getmtime(filepath)) <= threshold_end
462
+ ]
463
+
464
+ # Process valid DOCX files
465
+ for filepath in valid_files:
466
+ MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
467
+ try:
468
+ patient_data = parse_docx(filepath, surgery_dates) # Pass surgery_dates to parse_docx
469
+ for patient_id, service_dates in patient_data.items():
470
+ if patient_id not in all_patient_data:
471
+ all_patient_data[patient_id] = {}
472
+ for date_of_service, diagnosis_data in service_dates.items():
473
+ all_patient_data[patient_id][date_of_service] = diagnosis_data
474
+ except Exception as e:
475
+ MediLink_ConfigLoader.log("Error parsing DOCX file {}: {}".format(filepath, e), level="ERROR")
476
+
477
+ # Log if no valid files were found
478
+ if not valid_files:
479
+ MediLink_ConfigLoader.log("No valid DOCX files found within the modification time threshold.", level="INFO")
480
+
481
+ # Debug logging for all_patient_data
482
+ MediLink_ConfigLoader.log("All patient data collected from DOCX files: {}".format(all_patient_data), level="DEBUG")
483
+
484
+ # Extract patient IDs from csv_data for efficient matching
485
+ patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
486
+
487
+ # Check if any patient data was collected
488
+ if not all_patient_data or not patient_ids_in_csv.intersection(all_patient_data.keys()):
489
+ MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
490
+ return # Exit the function early if no data is available
491
+
492
+ # Get Medisoft shorthand dictionary from crosswalk.
493
+ diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {})
494
+
495
+ # Initialize counter for updated rows
496
+ updated_count = 0
497
+
498
+ # Update the "Default Diagnosis #1" column in the CSV data
499
+ for row_num, row in enumerate(csv_data, start=1):
500
+ patient_id = row.get('Patient ID', '').strip()
501
+ if patient_id not in patient_ids_in_csv:
502
+ continue # Skip rows that do not match any patient ID
503
+
504
+ MediLink_ConfigLoader.log("Processing row number {}.".format(row_num), level="DEBUG")
505
+ surgery_date = row.get('Surgery Date', '')
506
+
507
+ # Convert surgery_date to string format for lookup
508
+ if surgery_date != datetime.min:
509
+ surgery_date_str = surgery_date.strftime("%m-%d-%Y")
510
+ else:
511
+ surgery_date_str = ''
512
+
513
+ MediLink_ConfigLoader.log("Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
514
+
515
+ if patient_id in all_patient_data:
516
+ if surgery_date_str in all_patient_data[patient_id]:
517
+ diagnosis_code, left_or_right_eye, femto_yes_or_no = all_patient_data[patient_id][surgery_date_str]
518
+ MediLink_ConfigLoader.log("Found diagnosis data for Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
519
+
520
+ # Convert diagnosis code to Medisoft shorthand format.
521
+ medisoft_shorthand = diagnosis_to_medisoft.get(diagnosis_code, None)
522
+ if medisoft_shorthand is None and diagnosis_code:
523
+ defaulted_code = diagnosis_code.lstrip('H').lstrip('T8').replace('.', '')[-5:]
524
+ medisoft_shorthand = defaulted_code
525
+ MediLink_ConfigLoader.log("Converted diagnosis code to Medisoft shorthand: {}".format(medisoft_shorthand), level="DEBUG")
526
+
527
+ row['Default Diagnosis #1'] = medisoft_shorthand
528
+ updated_count += 1
529
+ MediLink_ConfigLoader.log("Updated row number {} with new diagnosis code.".format(row_num), level="INFO")
530
+ else:
531
+ MediLink_ConfigLoader.log("No matching surgery date found for Patient ID: {} in row {}.".format(patient_id, row_num), level="INFO")
532
+ else:
533
+ MediLink_ConfigLoader.log("Patient ID: {} not found in DOCX data for row {}.".format(patient_id, row_num), level="INFO")
534
+
535
+ # Log total count of updated rows
536
+ MediLink_ConfigLoader.log("Total {} 'Default Diagnosis #1' rows updated.".format(updated_count), level="INFO")
537
+
538
+ except Exception as e:
539
+ message = "An error occurred while updating diagnosis codes. Please check the DOCX files and configuration: {}".format(e)
540
+ MediLink_ConfigLoader.log(message, level="ERROR")
541
+ print(message)
542
+
543
+ def load_data_sources(config, crosswalk):
544
+ """Loads historical mappings from MAPAT and Carol's CSVs."""
545
+ patient_id_to_insurance_id = load_insurance_data_from_mapat(config, crosswalk)
546
+ if not patient_id_to_insurance_id:
547
+ raise ValueError("Failed to load historical Patient ID to Insurance ID mappings from MAPAT.")
548
+
549
+ payer_id_to_patient_ids = load_historical_payer_to_patient_mappings(config)
550
+ if not payer_id_to_patient_ids:
551
+ raise ValueError("Failed to load historical Carol's CSVs.")
552
+
553
+ return patient_id_to_insurance_id, payer_id_to_patient_ids
554
+
555
+ def map_payer_ids_to_insurance_ids(patient_id_to_insurance_id, payer_id_to_patient_ids):
556
+ """Maps Payer IDs to Insurance IDs based on the historical mappings."""
557
+ payer_id_to_details = {}
558
+ for payer_id, patient_ids in payer_id_to_patient_ids.items():
559
+ medisoft_ids = set()
560
+ for patient_id in patient_ids:
561
+ if patient_id in patient_id_to_insurance_id:
562
+ medisoft_id = patient_id_to_insurance_id[patient_id]
563
+ medisoft_ids.add(medisoft_id)
564
+ MediLink_ConfigLoader.log("Added Medisoft ID {} for Patient ID {} and Payer ID {}".format(medisoft_id, patient_id, payer_id))
565
+ else:
566
+ MediLink_ConfigLoader.log("No matching Insurance ID found for Patient ID {}".format(patient_id))
567
+ if medisoft_ids:
568
+ payer_id_to_details[payer_id] = {
569
+ "endpoint": "OPTUMEDI", # TODO Default, to be refined via API poll. There are 2 of these defaults!
570
+ "medisoft_id": list(medisoft_ids),
571
+ "medisoft_medicare_id": [] # Placeholder for future implementation
572
+ }
573
+ return payer_id_to_details
574
+
575
+ def load_insurance_data_from_mains(config):
576
+ """
577
+ Loads insurance data from MAINS and creates a mapping from insurance names to their respective IDs.
578
+ This mapping is critical for the crosswalk update process to correctly associate payer IDs with insurance IDs.
579
+
580
+ Args:
581
+ config (dict): Configuration object containing necessary paths and parameters.
582
+
583
+ Returns:
584
+ dict: A dictionary mapping insurance names to insurance IDs.
585
+ """
586
+ # Reset config pull to make sure its not using the MediLink config key subset
587
+ config, crosswalk = MediLink_ConfigLoader.load_configuration()
588
+
589
+ # Retrieve MAINS path and slicing information from the configuration
590
+ # TODO (Low) For secondary insurance, this needs to be pulling from the correct MAINS (there are 2)
591
+ # TODO (Low) Performance: There probably needs to be a dictionary proxy for MAINS that gets updated.
592
+ # Meh, this just has to be part of the new architecture plan where we make Medisoft a downstream
593
+ # recipient from the db.
594
+ # TODO (High) The Medisoft Medicare flag needs to be brought in here.
595
+ mains_path = config['MAINS_MED_PATH']
596
+ mains_slices = crosswalk['mains_mapping']['slices']
597
+
598
+ # Initialize the dictionary to hold the insurance to insurance ID mappings
599
+ insurance_to_id = {}
600
+
601
+ # Read data from MAINS using a provided function to handle fixed-width data
602
+ for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
603
+ insurance_name = record['MAINSNAME']
604
+ # Assuming line_number gives the correct insurance ID without needing adjustment
605
+ insurance_to_id[insurance_name] = line_number
606
+
607
+ return insurance_to_id
608
+
609
+ def load_insurance_data_from_mapat(config, crosswalk):
610
+ """
611
+ Loads insurance data from MAPAT and creates a mapping from patient ID to insurance ID.
612
+
613
+ Args:
614
+ config (dict): Configuration object containing necessary paths and parameters.
615
+ crosswalk ... ADD HERE.
616
+
617
+ Returns:
618
+ dict: A dictionary mapping patient IDs to insurance IDs.
619
+ """
620
+ # Retrieve MAPAT path and slicing information from the configuration
621
+ mapat_path = app_control.get_mapat_med_path()
622
+ mapat_slices = crosswalk['mapat_mapping']['slices']
623
+
624
+ # Initialize the dictionary to hold the patient ID to insurance ID mappings
625
+ patient_id_to_insurance_id = {}
626
+
627
+ # Read data from MAPAT using a provided function to handle fixed-width data
628
+ for record, _ in MediLink_DataMgmt.read_general_fixed_width_data(mapat_path, mapat_slices):
629
+ patient_id = record['MAPATPXID']
630
+ insurance_id = record['MAPATINID']
631
+ patient_id_to_insurance_id[patient_id] = insurance_id
632
+
633
+ return patient_id_to_insurance_id
634
+
635
+ def parse_z_dat(z_dat_path, config): # Why is this in MediBot and not MediLink?
636
+ """
637
+ Parses the Z.dat file to map Patient IDs to Insurance Names using the provided fixed-width file format.
638
+
639
+ Args:
640
+ z_dat_path (str): Path to the Z.dat file.
641
+ config (dict): Configuration object containing slicing information and other parameters.
642
+
643
+ Returns:
644
+ dict: A dictionary mapping Patient IDs to Insurance Names.
645
+ """
646
+ patient_id_to_insurance_name = {}
647
+
648
+ try:
649
+ # Reading blocks of fixed-width data (up to 5 lines per record)
650
+ for personal_info, insurance_info, service_info, service_info_2, service_info_3 in MediLink_DataMgmt.read_fixed_width_data(z_dat_path):
651
+ # Parsing the data using slice definitions from the config
652
+ parsed_data = MediLink_DataMgmt.parse_fixed_width_data(personal_info, insurance_info, service_info, service_info_2, service_info_3, config.get('MediLink_Config', config))
653
+
654
+ # Extract Patient ID and Insurance Name from parsed data
655
+ patient_id = parsed_data.get('PATID')
656
+ insurance_name = parsed_data.get('INAME')
657
+
658
+ if patient_id and insurance_name:
659
+ patient_id_to_insurance_name[patient_id] = insurance_name
660
+ MediLink_ConfigLoader.log("Mapped Patient ID {} to Insurance Name {}".format(patient_id, insurance_name), config, level="INFO")
661
+
662
+ except FileNotFoundError:
663
+ MediLink_ConfigLoader.log("File not found: {}".format(z_dat_path), config, level="INFO")
664
+ except Exception as e:
665
+ MediLink_ConfigLoader.log("Failed to parse Z.dat: {}".format(str(e)), config, level="INFO")
666
+
667
+ return patient_id_to_insurance_name
668
+
669
+ def load_historical_payer_to_patient_mappings(config):
670
+ """
671
+ Loads historical mappings from multiple Carol's CSV files in a specified directory,
672
+ mapping Payer IDs to sets of Patient IDs.
673
+
674
+ Args:
675
+ config (dict): Configuration object containing the directory path for Carol's CSV files
676
+ and other necessary parameters.
677
+
678
+ Returns:
679
+ dict: A dictionary where each key is a Payer ID and the value is a set of Patient IDs.
680
+ """
681
+ directory_path = os.path.dirname(config['CSV_FILE_PATH'])
682
+ payer_to_patient_ids = defaultdict(set)
683
+
684
+ try:
685
+ # Check if the directory exists
686
+ if not os.path.isdir(directory_path):
687
+ raise FileNotFoundError("Directory '{}' not found.".format(directory_path))
688
+
689
+ # Loop through each file in the directory containing Carol's historical CSVs
690
+ for filename in os.listdir(directory_path):
691
+ file_path = os.path.join(directory_path, filename)
692
+ if filename.endswith('.csv'):
693
+ try:
694
+ with open(file_path, 'r', encoding='utf-8') as csvfile:
695
+ reader = csv.DictReader(csvfile)
696
+ patient_count = 0 # Counter for Patient IDs found in this CSV
697
+ for row in reader:
698
+ if 'Patient ID' not in row or 'Ins1 Payer ID' not in row:
699
+ continue # Skip this row if either key is missing
700
+ if not row.get('Patient ID').strip() or not row.get('Ins1 Payer ID').strip():
701
+ continue # Skip this row if either value is missing or empty
702
+
703
+ payer_id = row['Ins1 Payer ID'].strip()
704
+ patient_id = row['Patient ID'].strip()
705
+ payer_to_patient_ids[payer_id].add(patient_id)
706
+ patient_count += 1 # Increment the counter for each valid mapping
707
+
708
+ # Log the accumulated count for this CSV file
709
+ if patient_count > 0:
710
+ MediLink_ConfigLoader.log("CSV file '{}' has {} Patient IDs with Payer IDs.".format(filename, patient_count), level="DEBUG")
711
+ else:
712
+ MediLink_ConfigLoader.log("CSV file '{}' is empty or does not have valid Patient ID or Payer ID mappings.".format(filename), level="DEBUG")
713
+ except Exception as e:
714
+ print("Error processing file {}: {}".format(filename, e))
715
+ MediLink_ConfigLoader.log("Error processing file '{}': {}".format(filename, e), level="ERROR")
716
+ except FileNotFoundError as e:
717
+ print("Error: {}".format(e))
718
+
719
+ if not payer_to_patient_ids:
720
+ print("No historical mappings were generated.")
721
+
722
+ return dict(payer_to_patient_ids)
723
+
724
+ def capitalize_all_fields(csv_data):
725
+ """
726
+ Converts all text fields in the CSV data to uppercase.
727
+
728
+ Parameters:
729
+ csv_data (list of dict): The CSV data where each row is represented as a dictionary.
730
+
731
+ Returns:
732
+ None: The function modifies the csv_data in place.
733
+ """
734
+ for row in csv_data:
735
+ for key, value in row.items():
736
+ if isinstance(value, str):
737
+ row[key] = value.upper()
738
+ elif isinstance(value, datetime):
739
+ # Keep datetime objects as they are
740
+ pass
741
+ elif value is not None:
742
+ # Convert any other non-None values to string and then uppercase
743
743
  row[key] = str(value).upper()