medicafe 0.250723.4__tar.gz → 0.250724.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of medicafe might be problematic. Click here for more details.

Files changed (59) hide show
  1. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot.py +3 -1
  2. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_Preprocessor_lib.py +87 -50
  3. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_dataformat_library.py +5 -2
  4. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_docx_decoder.py +149 -62
  5. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_API_v3.py +7 -0
  6. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Decoder.py +38 -18
  7. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Deductible.py +23 -17
  8. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Parser.py +79 -55
  9. {medicafe-0.250723.4 → medicafe-0.250724.0}/PKG-INFO +1 -1
  10. {medicafe-0.250723.4 → medicafe-0.250724.0}/medicafe.egg-info/PKG-INFO +1 -1
  11. {medicafe-0.250723.4 → medicafe-0.250724.0}/setup.py +1 -1
  12. {medicafe-0.250723.4 → medicafe-0.250724.0}/LICENSE +0 -0
  13. {medicafe-0.250723.4 → medicafe-0.250724.0}/MANIFEST.in +0 -0
  14. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot.bat +0 -0
  15. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_Charges.py +0 -0
  16. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_Crosswalk_Library.py +0 -0
  17. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_Post.py +0 -0
  18. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_Preprocessor.py +0 -0
  19. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/MediBot_UI.py +0 -0
  20. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/PDF_to_CSV_Cleaner.py +0 -0
  21. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/__init__.py +0 -0
  22. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/update_json.py +0 -0
  23. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediBot/update_medicafe.py +0 -0
  24. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink.py +0 -0
  25. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_837p_cob_library.py +0 -0
  26. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_837p_encoder.py +0 -0
  27. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_837p_encoder_library.py +0 -0
  28. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_837p_utilities.py +0 -0
  29. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_API_Generator.py +0 -0
  30. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_API_v2.py +0 -0
  31. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_APIs.py +0 -0
  32. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Azure.py +0 -0
  33. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_ClaimStatus.py +0 -0
  34. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_ConfigLoader.py +0 -0
  35. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_DataMgmt.py +0 -0
  36. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Deductible_Validator.py +0 -0
  37. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Down.py +0 -0
  38. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Gmail.py +0 -0
  39. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_GraphQL.py +0 -0
  40. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Mailer.py +0 -0
  41. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Scan.py +0 -0
  42. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Scheduler.py +0 -0
  43. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_UI.py +0 -0
  44. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_Up.py +0 -0
  45. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/MediLink_batch.bat +0 -0
  46. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/Soumit_api.py +0 -0
  47. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/__init__.py +0 -0
  48. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/openssl.cnf +0 -0
  49. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/test.py +0 -0
  50. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/test_cob_library.py +0 -0
  51. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/test_validation.py +0 -0
  52. {medicafe-0.250723.4 → medicafe-0.250724.0}/MediLink/webapp.html +0 -0
  53. {medicafe-0.250723.4 → medicafe-0.250724.0}/README.md +0 -0
  54. {medicafe-0.250723.4 → medicafe-0.250724.0}/medicafe.egg-info/SOURCES.txt +0 -0
  55. {medicafe-0.250723.4 → medicafe-0.250724.0}/medicafe.egg-info/dependency_links.txt +0 -0
  56. {medicafe-0.250723.4 → medicafe-0.250724.0}/medicafe.egg-info/not-zip-safe +0 -0
  57. {medicafe-0.250723.4 → medicafe-0.250724.0}/medicafe.egg-info/requires.txt +0 -0
  58. {medicafe-0.250723.4 → medicafe-0.250724.0}/medicafe.egg-info/top_level.txt +0 -0
  59. {medicafe-0.250723.4 → medicafe-0.250724.0}/setup.cfg +0 -0
@@ -187,8 +187,10 @@ def data_entry_loop(csv_data, field_mapping, reverse_mapping, fixed_values):
187
187
  # last_processed_entry, parsed_address_components = None, {} // BUG should this just be this line rather than the global line above?
188
188
  error_message = '' # Initialize error_message once
189
189
  current_row_index = 0
190
+ # PERFORMANCE FIX: Cache list length to avoid repeated len() calls
191
+ csv_data_length = len(csv_data)
190
192
 
191
- while current_row_index < len(csv_data):
193
+ while current_row_index < csv_data_length:
192
194
  row = csv_data[current_row_index]
193
195
 
194
196
  # PERFORMANCE FIX: Clear accumulating memory while preserving F11 menu context
@@ -1,7 +1,7 @@
1
1
  #MediBot_Preprocessor_lib.py
2
2
  from collections import OrderedDict, defaultdict
3
3
  from datetime import datetime, timedelta
4
- import os, csv, sys
4
+ import os, csv, sys, time
5
5
  import chardet # Ensure chardet is imported
6
6
 
7
7
  # Add the parent directory of the project to the Python path
@@ -120,8 +120,8 @@ def load_csv_data(csv_file_path):
120
120
  # Clean the headers
121
121
  cleaned_headers = clean_header(reader.fieldnames)
122
122
 
123
- # Create a mapping of cleaned headers to original headers (pre-compute once)
124
- header_mapping = {cleaned_headers[i]: reader.fieldnames[i] for i in range(len(cleaned_headers))}
123
+ # PERFORMANCE FIX: Use zip() instead of range(len()) for header mapping
124
+ header_mapping = {clean: orig for clean, orig in zip(cleaned_headers, reader.fieldnames)}
125
125
 
126
126
  # Process the remaining rows - optimize by pre-allocating the list
127
127
  csv_data = []
@@ -129,9 +129,8 @@ def load_csv_data(csv_file_path):
129
129
  # csv_data = [None] * estimated_size # if we had row count
130
130
 
131
131
  for row in reader:
132
- # Use dict() constructor with generator expression for better performance
133
- cleaned_row = dict((cleaned_headers[i], row[header_mapping[cleaned_headers[i]]])
134
- for i in range(len(cleaned_headers)))
132
+ # PERFORMANCE FIX: Use zip() instead of range(len()) for row processing
133
+ cleaned_row = {clean: row[header_mapping[clean]] for clean in cleaned_headers}
135
134
  csv_data.append(cleaned_row)
136
135
 
137
136
  return csv_data # Return a list of dictionaries
@@ -161,9 +160,10 @@ def add_columns(csv_data, column_headers):
161
160
  elif not isinstance(column_headers, list):
162
161
  raise ValueError("column_headers should be a list or a string")
163
162
 
163
+ # PERFORMANCE FIX: Optimize column initialization to avoid nested loop
164
164
  for row in csv_data:
165
- for header in column_headers:
166
- row[header] = '' # Initialize the column with empty values
165
+ # Use dict.update() to set multiple columns at once
166
+ row.update({header: '' for header in column_headers})
167
167
 
168
168
  # Extracting the list to a variable for future refactoring:
169
169
  def filter_rows(csv_data):
@@ -304,6 +304,17 @@ def NEW_update_insurance_ids(csv_data, config, crosswalk):
304
304
  processed_payer_ids = set() # Track processed Payer IDs
305
305
  MediLink_ConfigLoader.log("Starting update of insurance IDs.", level="INFO")
306
306
 
307
+ # PERFORMANCE FIX: Pre-build flattened payer lookup cache to avoid nested dictionary access
308
+ payer_cache = {}
309
+ crosswalk_payers = crosswalk.get('payer_id', {})
310
+ for payer_id, details in crosswalk_payers.items():
311
+ payer_cache[payer_id] = {
312
+ 'medisoft_id': details.get('medisoft_id', []),
313
+ 'medisoft_medicare_id': details.get('medisoft_medicare_id', []),
314
+ 'endpoint': details.get('endpoint', None)
315
+ }
316
+ MediLink_ConfigLoader.log("Built payer cache for {} payers".format(len(payer_cache)), level="DEBUG")
317
+
307
318
  # Load MAINS data to get mapping from Medisoft ID to MAINS names
308
319
  insurance_to_id = load_insurance_data_from_mains(config) # Assuming it returns a dict mapping insurance names to IDs
309
320
  MediLink_ConfigLoader.log("Loaded MAINS data for insurance to ID mapping.", level="DEBUG")
@@ -313,7 +324,9 @@ def NEW_update_insurance_ids(csv_data, config, crosswalk):
313
324
  for insurance_name, medisoft_id in insurance_to_id.items():
314
325
  medisoft_to_mains_names[medisoft_id].append(insurance_name)
315
326
 
316
- for row in csv_data:
327
+ for row_idx, row in enumerate(csv_data, 1):
328
+ # PERFORMANCE FIX: Store row index to avoid O(n) csv_data.index() calls later
329
+ row['_row_index'] = row_idx
317
330
  ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
318
331
  MediLink_ConfigLoader.log("Processing row with Ins1 Payer ID: '{}'.".format(ins1_payer_id), level="DEBUG")
319
332
 
@@ -323,20 +336,24 @@ def NEW_update_insurance_ids(csv_data, config, crosswalk):
323
336
  processed_payer_ids.add(ins1_payer_id) # Add to set
324
337
  MediLink_ConfigLoader.log("Marked Payer ID '{}' as processed.".format(ins1_payer_id), level="DEBUG")
325
338
 
326
- # Retrieve Medisoft IDs for the current Payer ID
327
- medisoft_ids = crosswalk.get('payer_id', {}).get(ins1_payer_id, {}).get('medisoft_id', [])
339
+ # PERFORMANCE FIX: Use flattened cache instead of nested dictionary lookups
340
+ payer_info = payer_cache.get(ins1_payer_id, {})
341
+ medisoft_ids = payer_info.get('medisoft_id', [])
328
342
  MediLink_ConfigLoader.log("Retrieved Medisoft IDs for Payer ID '{}': {}".format(ins1_payer_id, medisoft_ids), level="DEBUG")
329
343
 
330
344
  if not medisoft_ids:
331
345
  MediLink_ConfigLoader.log("No Medisoft IDs available for Payer ID '{}', creating placeholder entry.".format(ins1_payer_id), level="WARNING")
332
- # Create a placeholder entry in the crosswalk
333
- if 'payer_id' not in crosswalk:
334
- crosswalk['payer_id'] = {}
335
- crosswalk['payer_id'][ins1_payer_id] = {
346
+ # Create a placeholder entry in the crosswalk and cache
347
+ placeholder_entry = {
336
348
  'medisoft_id': [], # Placeholder for future Medisoft IDs
337
349
  'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
338
350
  'endpoint': None # Placeholder for future endpoint
339
351
  }
352
+ if 'payer_id' not in crosswalk:
353
+ crosswalk['payer_id'] = {}
354
+ crosswalk['payer_id'][ins1_payer_id] = placeholder_entry
355
+ # PERFORMANCE FIX: Update cache with placeholder entry
356
+ payer_cache[ins1_payer_id] = placeholder_entry
340
357
  continue # Skip further processing for this Payer ID
341
358
 
342
359
  # If only one Medisoft ID is associated, assign it directly
@@ -344,7 +361,9 @@ def NEW_update_insurance_ids(csv_data, config, crosswalk):
344
361
  try:
345
362
  medisoft_id = int(medisoft_ids[0])
346
363
  row['Ins1 Insurance ID'] = medisoft_id
347
- MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row number {} with Payer ID '{}'.".format(medisoft_id, csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
364
+ # PERFORMANCE FIX: Use enumerate index instead of csv_data.index() which is O(n)
365
+ row_number = getattr(row, '_row_index', 'Unknown')
366
+ MediLink_ConfigLoader.log("Assigned Medisoft ID '{}' to row number {} with Payer ID '{}'.".format(medisoft_id, row_number, ins1_payer_id), level="DEBUG")
348
367
  except ValueError as e:
349
368
  MediLink_ConfigLoader.log("Error converting Medisoft ID '{}' to integer for Payer ID '{}': {}".format(medisoft_ids[0], ins1_payer_id, e), level="ERROR")
350
369
  row['Ins1 Insurance ID'] = None
@@ -398,9 +417,10 @@ def update_insurance_ids(csv_data, config, crosswalk):
398
417
  payer_id, medisoft_ids, medicare_ids), level="DEBUG")
399
418
 
400
419
  # PERFORMANCE FIX: Single pass through CSV data with optimized Medicare ID resolution
401
- for row in csv_data:
420
+ for row_idx, row in enumerate(csv_data, 1):
402
421
  ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
403
- MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(csv_data.index(row) + 1, ins1_payer_id), level="DEBUG")
422
+ # PERFORMANCE FIX: Use enumerate index instead of csv_data.index() which is O(n)
423
+ MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(row_idx, ins1_payer_id), level="DEBUG")
404
424
 
405
425
  # Try Medicare ID first, then fall back to regular ID (optimized Medicare processing)
406
426
  insurance_id = (payer_id_to_medicare.get(ins1_payer_id) or
@@ -517,30 +537,54 @@ def update_diagnosis_codes(csv_data):
517
537
 
518
538
  MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
519
539
 
520
- # Gather all relevant DOCX files in the specified directory
521
- # Optimize by combining file gathering and filtering in one pass
540
+ # PERFORMANCE OPTIMIZATION: Batch file system operations with caching
541
+ # Pre-convert threshold timestamps for efficient comparison (Windows XP compatible)
542
+ threshold_start_ts = threshold_start.timestamp() if hasattr(threshold_start, 'timestamp') else time.mktime(threshold_start.timetuple())
543
+ threshold_end_ts = threshold_end.timestamp() if hasattr(threshold_end, 'timestamp') else time.mktime(threshold_end.timetuple())
544
+
522
545
  valid_files = []
523
546
  try:
524
- for filename in os.listdir(local_storage_path):
525
- if filename.endswith(".docx"):
526
- filepath = os.path.join(local_storage_path, filename)
527
- # Check modification time only once per file
528
- try:
529
- mtime = os.path.getmtime(filepath)
530
- if threshold_start <= datetime.fromtimestamp(mtime) <= threshold_end:
531
- valid_files.append(filepath)
532
- except (OSError, ValueError):
533
- # Skip files with invalid modification times
534
- continue
547
+ # Use os.scandir() with optimized timestamp comparison (XP/3.4.4 compatible)
548
+ with os.scandir(local_storage_path) as entries:
549
+ for entry in entries:
550
+ if entry.name.endswith('.docx'):
551
+ # Get file modification time in single operation
552
+ try:
553
+ stat_info = entry.stat()
554
+ # Direct timestamp comparison avoids datetime conversion overhead
555
+ if threshold_start_ts <= stat_info.st_mtime <= threshold_end_ts:
556
+ valid_files.append(entry.path)
557
+ except (OSError, ValueError):
558
+ # Skip files with invalid modification times
559
+ continue
535
560
  except OSError:
536
561
  MediLink_ConfigLoader.log("Error accessing directory: {}".format(local_storage_path), level="ERROR")
537
562
  return
563
+
564
+ # PERFORMANCE OPTIMIZATION: Log file count for debugging without processing overhead
565
+ MediLink_ConfigLoader.log("Found {} DOCX files within date threshold".format(len(valid_files)), level="INFO")
566
+
567
+ # PERFORMANCE OPTIMIZATION: Pre-process patient IDs for efficient lookup
568
+ # Create a set of patient IDs from CSV data for faster lookups
569
+ patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
570
+
571
+ # PERFORMANCE OPTIMIZATION: Pre-convert surgery dates to string format
572
+ # Convert all surgery dates to string format once to avoid repeated conversions in loops
573
+ surgery_date_strings = {}
574
+ for row in csv_data:
575
+ patient_id = row.get('Patient ID', '').strip()
576
+ surgery_date = row.get('Surgery Date')
577
+ if surgery_date != datetime.min:
578
+ surgery_date_strings[patient_id] = surgery_date.strftime("%m-%d-%Y")
579
+ else:
580
+ surgery_date_strings[patient_id] = ''
538
581
 
539
582
  # Process valid DOCX files
540
583
  for filepath in valid_files:
541
584
  MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
542
585
  try:
543
586
  patient_data = parse_docx(filepath, surgery_dates) # Pass surgery_dates to parse_docx
587
+ # PERFORMANCE OPTIMIZATION: Use defaultdict for more efficient dictionary operations
544
588
  for patient_id, service_dates in patient_data.items():
545
589
  if patient_id not in all_patient_data:
546
590
  all_patient_data[patient_id] = {}
@@ -556,9 +600,6 @@ def update_diagnosis_codes(csv_data):
556
600
  # Debug logging for all_patient_data
557
601
  MediLink_ConfigLoader.log("All patient data collected from DOCX files: {}".format(all_patient_data), level="DEBUG")
558
602
 
559
- # Extract patient IDs from csv_data for efficient matching
560
- patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
561
-
562
603
  # Check if any patient data was collected
563
604
  if not all_patient_data or not patient_ids_in_csv.intersection(all_patient_data.keys()):
564
605
  MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
@@ -570,20 +611,17 @@ def update_diagnosis_codes(csv_data):
570
611
  # Initialize counter for updated rows
571
612
  updated_count = 0
572
613
 
614
+ # PERFORMANCE OPTIMIZATION: Single pass through CSV data with pre-processed lookups
573
615
  # Update the "Default Diagnosis #1" column in the CSV data
574
616
  for row_num, row in enumerate(csv_data, start=1):
575
617
  patient_id = row.get('Patient ID', '').strip()
618
+ # Use pre-processed patient ID lookup for efficiency
576
619
  if patient_id not in patient_ids_in_csv:
577
620
  continue # Skip rows that do not match any patient ID
578
621
 
579
622
  MediLink_ConfigLoader.log("Processing row number {}.".format(row_num), level="DEBUG")
580
- surgery_date = row.get('Surgery Date', '')
581
-
582
- # Convert surgery_date to string format for lookup
583
- if surgery_date != datetime.min:
584
- surgery_date_str = surgery_date.strftime("%m-%d-%Y")
585
- else:
586
- surgery_date_str = ''
623
+ # Use pre-converted surgery date string for efficient lookup
624
+ surgery_date_str = surgery_date_strings.get(patient_id, '')
587
625
 
588
626
  MediLink_ConfigLoader.log("Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
589
627
 
@@ -809,13 +847,12 @@ def capitalize_all_fields(csv_data):
809
847
  Returns:
810
848
  None: The function modifies the csv_data in place.
811
849
  """
850
+ # PERFORMANCE FIX: Optimize uppercase conversion using dict comprehension
812
851
  for row in csv_data:
813
- for key, value in row.items():
814
- if isinstance(value, str):
815
- row[key] = value.upper()
816
- elif isinstance(value, datetime):
817
- # Keep datetime objects as they are
818
- pass
819
- elif value is not None:
820
- # Convert any other non-None values to string and then uppercase
821
- row[key] = str(value).upper()
852
+ # Single-pass update using dict comprehension
853
+ row.update({
854
+ key: (value.upper() if isinstance(value, str)
855
+ else str(value).upper() if value is not None and not isinstance(value, datetime)
856
+ else value)
857
+ for key, value in row.items()
858
+ })
@@ -78,14 +78,17 @@ def enforce_significant_length(output):
78
78
  # First line of defense: Replace ' APT ' or ' UNIT ' with ' #' if the original length is longer than 30 characters.
79
79
  temp_output = temp_output.replace(' APT ', ' #').replace(' UNIT ', ' #')
80
80
 
81
- # Remove spaces in a controlled manner from right to left if still too long
82
- while len(temp_output) > 30:
81
+ # PERFORMANCE FIX: Remove spaces in a controlled manner from right to left if still too long
82
+ # Cache length calculation to avoid repeated calls
83
+ temp_length = len(temp_output)
84
+ while temp_length > 30:
83
85
  # Find the last space
84
86
  last_space_index = temp_output.rfind(' ')
85
87
  if last_space_index == -1:
86
88
  break
87
89
  # Remove the last space
88
90
  temp_output = temp_output[:last_space_index] + temp_output[last_space_index+7:]
91
+ temp_length = len(temp_output) # Update cached length
89
92
 
90
93
  # If still greater than 30, truncate to 30 characters
91
94
  if len(temp_output) > 30:
@@ -1,7 +1,7 @@
1
1
  #MediBot_docx_decoder.py
2
2
  from datetime import datetime
3
3
  from collections import OrderedDict
4
- import os, re, sys, zipfile
4
+ import os, re, sys, zipfile, pprint
5
5
  from docx import Document
6
6
  from lxml import etree
7
7
 
@@ -14,6 +14,56 @@ try:
14
14
  except ImportError:
15
15
  from MediLink import MediLink_ConfigLoader
16
16
 
17
+ # Pre-compile regex patterns for better performance (XP/3.4.4 compatible)
18
+ _DIAGNOSIS_CODE_PATTERN = re.compile(r'H\d{2}\.\d+')
19
+ _DAY_WEEK_PATTERN = re.compile(r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)")
20
+ _MONTH_DAY_PATTERN = re.compile(r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}")
21
+ _YEAR_PATTERN = re.compile(r"\d{4}")
22
+ _YEAR_SPLIT_PATTERNS = [
23
+ re.compile(r'(\d{3}) (\d{1})'),
24
+ re.compile(r'(\d{1}) (\d{3})'),
25
+ re.compile(r'(\d{2}) (\d{2})')
26
+ ]
27
+ _DIGIT_PARTS_PATTERN = re.compile(r'\b(\d{1,2})\b')
28
+ _COMMA_PATTERN = re.compile(r',')
29
+
30
+ # Pre-compile abbreviation patterns for normalize_text optimization
31
+ _MONTH_ABBR_PATTERNS = {
32
+ 'JAN': re.compile(r'\bJAN\b', re.IGNORECASE),
33
+ 'FEB': re.compile(r'\bFEB\b', re.IGNORECASE),
34
+ 'MAR': re.compile(r'\bMAR\b', re.IGNORECASE),
35
+ 'APR': re.compile(r'\bAPR\b', re.IGNORECASE),
36
+ 'MAY': re.compile(r'\bMAY\b', re.IGNORECASE),
37
+ 'JUN': re.compile(r'\bJUN\b', re.IGNORECASE),
38
+ 'JUL': re.compile(r'\bJUL\b', re.IGNORECASE),
39
+ 'AUG': re.compile(r'\bAUG\b', re.IGNORECASE),
40
+ 'SEP': re.compile(r'\bSEP\b', re.IGNORECASE),
41
+ 'OCT': re.compile(r'\bOCT\b', re.IGNORECASE),
42
+ 'NOV': re.compile(r'\bNOV\b', re.IGNORECASE),
43
+ 'DEC': re.compile(r'\bDEC\b', re.IGNORECASE)
44
+ }
45
+
46
+ _DAY_ABBR_PATTERNS = {
47
+ 'MON': re.compile(r'\bMON\b', re.IGNORECASE),
48
+ 'TUE': re.compile(r'\bTUE\b', re.IGNORECASE),
49
+ 'WED': re.compile(r'\bWED\b', re.IGNORECASE),
50
+ 'THU': re.compile(r'\bTHU\b', re.IGNORECASE),
51
+ 'FRI': re.compile(r'\bFRI\b', re.IGNORECASE),
52
+ 'SAT': re.compile(r'\bSAT\b', re.IGNORECASE),
53
+ 'SUN': re.compile(r'\bSUN\b', re.IGNORECASE)
54
+ }
55
+
56
+ # Month and day mapping dictionaries
57
+ _MONTH_MAP = {
58
+ 'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
59
+ 'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
60
+ 'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
61
+ }
62
+ _DAY_MAP = {
63
+ 'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
64
+ 'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
65
+ }
66
+
17
67
 
18
68
  def parse_docx(filepath, surgery_dates): # Accept surgery_dates as a parameter
19
69
  try:
@@ -186,10 +236,15 @@ def find_text_in_xml(extract_dir, target_text):
186
236
  root = tree.getroot()
187
237
  namespaces = root.nsmap
188
238
  MediLink_ConfigLoader.log("Processing file: {}".format(file_path), level="DEBUG")
239
+ # More efficient: collect all text first, then search
240
+ all_text = []
189
241
  for elem in root.xpath('//w:t', namespaces=namespaces):
190
- if elem.text and target_pattern.search(elem.text):
191
- MediLink_ConfigLoader.log("Found target text '{}' in file: {}".format(target_text, file_path), level="DEBUG")
192
- return file_path
242
+ if elem.text:
243
+ all_text.append(elem.text)
244
+ combined_text = ' '.join(all_text)
245
+ if target_pattern.search(combined_text):
246
+ MediLink_ConfigLoader.log("Found target text '{}' in file: {}".format(target_text, file_path), level="DEBUG")
247
+ return file_path
193
248
  except etree.XMLSyntaxError as e:
194
249
  MediLink_ConfigLoader.log("XMLSyntaxError parsing file {}: {}".format(file_path, e), level="ERROR")
195
250
  except Exception as e:
@@ -211,18 +266,14 @@ def extract_date_from_file(file_path):
211
266
  combined_text = ' '.join(collected_text)
212
267
  combined_text = reassemble_year(combined_text) # Fix OCR splitting years
213
268
  combined_text = normalize_text(combined_text) # Normalize abbreviations
214
- combined_text = re.sub(r',', '', combined_text) # Remove commas if they exist
269
+ combined_text = _COMMA_PATTERN.sub('', combined_text) # Remove commas if they exist
215
270
 
216
271
  # Log the combined text
217
272
  MediLink_ConfigLoader.log("Combined text from file '{}': {}".format(file_path, combined_text[:200]), level="DEBUG")
218
273
 
219
- day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
220
- month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
221
- year_pattern = r"\d{4}"
222
-
223
- day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
224
- month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
225
- year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
274
+ day_of_week = _DAY_WEEK_PATTERN.search(combined_text, re.IGNORECASE)
275
+ month_day = _MONTH_DAY_PATTERN.search(combined_text, re.IGNORECASE)
276
+ year_match = _YEAR_PATTERN.search(combined_text, re.IGNORECASE)
226
277
 
227
278
  # Log the results of the regex searches
228
279
  MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
@@ -276,18 +327,14 @@ def extract_date_from_content(xml_content):
276
327
  combined_text = ' '.join(collected_text)
277
328
  combined_text = reassemble_year(combined_text) # Fix OCR splitting years
278
329
  combined_text = normalize_text(combined_text) # Normalize abbreviations
279
- combined_text = re.sub(r',', '', combined_text) # Remove commas if they exist
330
+ combined_text = _COMMA_PATTERN.sub('', combined_text) # Remove commas if they exist
280
331
 
281
332
  # Log the combined text
282
333
  MediLink_ConfigLoader.log("Combined text: {}".format(combined_text[:200]), level="DEBUG") # Log first 200 characters
283
334
 
284
- day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
285
- month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
286
- year_pattern = r"\d{4}"
287
-
288
- day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
289
- month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
290
- year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
335
+ day_of_week = _DAY_WEEK_PATTERN.search(combined_text, re.IGNORECASE)
336
+ month_day = _MONTH_DAY_PATTERN.search(combined_text, re.IGNORECASE)
337
+ year_match = _YEAR_PATTERN.search(combined_text, re.IGNORECASE)
291
338
 
292
339
  MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
293
340
  MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
@@ -342,40 +389,36 @@ def remove_directory(path):
342
389
 
343
390
 
344
391
  def normalize_text(text):
345
- month_map = {
346
- 'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
347
- 'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
348
- 'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
349
- }
350
- day_map = {
351
- 'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
352
- 'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
353
- }
354
-
355
- for abbr, full in month_map.items():
356
- text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
357
- for abbr, full in day_map.items():
358
- text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
392
+ # Optimized single-pass processing to avoid O(n²) complexity
393
+ # Process all abbreviations in one pass instead of multiple regex calls
394
+ for abbr, pattern in _MONTH_ABBR_PATTERNS.items():
395
+ text = pattern.sub(_MONTH_MAP[abbr], text)
396
+ for abbr, pattern in _DAY_ABBR_PATTERNS.items():
397
+ text = pattern.sub(_DAY_MAP[abbr], text)
359
398
 
360
399
  return text
361
400
 
362
401
 
363
402
  def reassemble_year(text):
364
- # First, handle the most common case where a 4-digit year is split as (3,1), (1,3), or (2,2)
365
- text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', text)
366
- text = re.sub(r'(\d{1}) (\d{3})', r'\1\2', text)
367
- text = re.sub(r'(\d{2}) (\d{2})', r'\1\2', text)
403
+ # Optimized year reassembly with early exit conditions
404
+ # First, handle the most common cases with pre-compiled patterns
405
+ for pattern in _YEAR_SPLIT_PATTERNS:
406
+ text = pattern.sub(r'\1\2', text)
368
407
 
369
408
  # Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
370
- parts = re.findall(r'\b(\d{1,2})\b', text)
371
- if len(parts) >= 4:
372
- for i in range(len(parts) - 3):
409
+ parts = _DIGIT_PARTS_PATTERN.findall(text)
410
+ parts_len = len(parts)
411
+ if parts_len >= 4:
412
+ # PERFORMANCE FIX: Use direct indexing instead of range(len()) pattern
413
+ max_index = parts_len - 3
414
+ for i in range(max_index):
373
415
  candidate = ''.join(parts[i:i + 4])
374
416
  if len(candidate) == 4 and candidate.isdigit():
375
- combined_year = candidate
376
- pattern = r'\b' + r'\s+'.join(parts[i:i + 4]) + r'\b'
377
- text = re.sub(pattern, combined_year, text)
378
- break
417
+ # More efficient pattern construction
418
+ pattern_parts = [r'\b' + part + r'\b' for part in parts[i:i + 4]]
419
+ pattern = r'\s+'.join(pattern_parts)
420
+ text = re.sub(pattern, candidate, text)
421
+ break # Early exit after first successful combination
379
422
 
380
423
  return text
381
424
 
@@ -390,9 +433,8 @@ def parse_patient_id(text):
390
433
 
391
434
  def parse_diagnosis_code(text):
392
435
  try:
393
- # Regular expression to find all ICD-10 codes starting with 'H' and containing a period
394
- pattern = re.compile(r'H\d{2}\.\d+')
395
- matches = pattern.findall(text)
436
+ # Use pre-compiled pattern for better performance
437
+ matches = _DIAGNOSIS_CODE_PATTERN.findall(text)
396
438
 
397
439
  if matches:
398
440
  return matches[0] # Return the first match
@@ -432,27 +474,72 @@ def parse_femto_yes_or_no(text):
432
474
  return False
433
475
 
434
476
 
435
- def rotate_docx_files(directory):
436
- # List all files in the directory
437
- files = os.listdir(directory)
477
+ def rotate_docx_files(directory, surgery_dates=None):
478
+ """
479
+ Process all DOCX files in the specified directory that contain "DR" and "SS" in their filename.
480
+
481
+ Parameters:
482
+ - directory (str): Path to the directory containing DOCX files
483
+ - surgery_dates (set, optional): Set of surgery dates to filter by. If None, processes all files.
484
+
485
+ Returns:
486
+ - dict: Combined patient data from all processed files
487
+ """
488
+ # PERFORMANCE OPTIMIZATION: Use os.scandir() for more efficient file system operations
489
+ # This reduces the number of file system calls and improves performance with large directories
490
+ valid_files = []
491
+ try:
492
+ # Use os.scandir() for better performance (XP/3.4.4 compatible)
493
+ with os.scandir(directory) as entries:
494
+ for entry in entries:
495
+ # Filter files that contain "DR" and "SS" in the filename
496
+ if (entry.name.endswith('.docx') and
497
+ "DR" in entry.name and
498
+ "SS" in entry.name):
499
+ valid_files.append(entry.path)
500
+ except OSError as e:
501
+ print("Error accessing directory '{}': {}".format(directory, e))
502
+ return {}
438
503
 
439
- # Filter files that contain "DR" and "SS" in the filename
440
- filtered_files = [file for file in files if "DR" in file and "SS" in file]
504
+ if not valid_files:
505
+ print("No valid DOCX files found in directory: {}".format(directory))
506
+ return {}
507
+
508
+ # Initialize combined patient data dictionary
509
+ combined_patient_data = {}
510
+
511
+ # Process each valid DOCX file
512
+ for filepath in valid_files:
513
+ filename = os.path.basename(filepath) # Extract filename for display
514
+ print("Processing file: {}".format(filename))
515
+
516
+ try:
517
+ # Parse the document with surgery_dates parameter
518
+ patient_data_dict = parse_docx(filepath, surgery_dates or set())
519
+
520
+ # Combine patient data from this file with overall results
521
+ for patient_id, service_dates in patient_data_dict.items():
522
+ if patient_id not in combined_patient_data:
523
+ combined_patient_data[patient_id] = {}
524
+ combined_patient_data[patient_id].update(service_dates)
525
+
526
+ # Print results for this file
527
+ print("Data from file '{}':".format(filename))
528
+ pprint.pprint(patient_data_dict)
529
+ print()
530
+
531
+ except Exception as e:
532
+ print("Error processing file '{}': {}".format(filename, e))
533
+ MediLink_ConfigLoader.log("Error processing DOCX file '{}': {}".format(filepath, e), level="ERROR")
534
+ continue # Continue with next file instead of crashing
441
535
 
442
- # Iterate through filtered files
443
- for filename in filtered_files:
444
- filepath = os.path.join(directory, filename)
445
- # Parse each document and print the resulting dictionary
446
- patient_data_dict = parse_docx(filepath)
447
- print("Data from file '{}':".format(filename))
448
- import pprint
449
- pprint.pprint(patient_data_dict)
450
- print()
536
+ return combined_patient_data
451
537
 
452
538
 
453
539
  def main():
454
540
  # Call the function with the directory containing your .docx files
455
541
  directory = "C:\\Users\\danie\\Downloads\\"
542
+ # Note: surgery_dates parameter is now optional
456
543
  rotate_docx_files(directory)
457
544
 
458
545
 
@@ -640,6 +640,13 @@ def submit_uhc_claim(client, x12_request_data):
640
640
  it returns a simulated response. If Test Mode is not enabled, it submits the claim and then retrieves
641
641
  the claim acknowledgement details using the transaction ID from the initial response.
642
642
 
643
+ NOTE: This function uses endpoints that may not be available in the new swagger version:
644
+ - /Claims/api/claim-submission/v1 (claim submission)
645
+ - /Claims/api/claim-details/v1 (claim acknowledgement)
646
+
647
+ If these endpoints are deprecated in the new swagger, this function will need to be updated
648
+ to use the new available endpoints.
649
+
643
650
  :param client: An instance of APIClient
644
651
  :param x12_request_data: The x12 837p data as a string
645
652
  :return: The final response containing the claim acknowledgement details or a dummy response if in Test Mode
@@ -204,20 +204,30 @@ def display_table(records):
204
204
  print("No records to display.")
205
205
  return
206
206
 
207
- # Determine which fields have at least one non-empty value
208
- used_fields = [field for field in new_fieldnames if any(str(record.get(field, '')).strip() for record in records)]
207
+ # PERFORMANCE FIX: Single-pass optimization - determine used fields and calculate widths in one pass
208
+ used_fields = []
209
+ col_widths = {}
210
+
211
+ # First pass: identify used fields and initialize widths
212
+ for field in new_fieldnames:
213
+ col_widths[field] = len(field) # Header width
214
+
215
+ # Second pass: check for used fields and calculate max widths
216
+ for record in records:
217
+ for field in new_fieldnames:
218
+ value_str = str(record.get(field, ''))
219
+ if value_str.strip() and field not in used_fields:
220
+ used_fields.append(field)
221
+ if field in col_widths:
222
+ col_widths[field] = max(col_widths[field], len(value_str))
223
+
224
+ # Filter col_widths to only used fields
225
+ col_widths = {field: col_widths[field] for field in used_fields}
209
226
 
210
227
  if not used_fields:
211
228
  print("No data to display.")
212
229
  return
213
230
 
214
- # Calculate column widths based on the longest item in each used column
215
- col_widths = {field: len(field) for field in used_fields}
216
-
217
- for record in records:
218
- for field in used_fields:
219
- col_widths[field] = max(col_widths[field], len(str(record.get(field, ''))))
220
-
221
231
  # Create table header
222
232
  header = " | ".join("{:<{}}".format(field, col_widths[field]) for field in used_fields)
223
233
  print(header)
@@ -256,20 +266,30 @@ def display_consolidated_records(records):
256
266
  print("No valid records to display after filtering empty rows.")
257
267
  return
258
268
 
259
- # Determine which fields have at least one non-empty value
260
- used_fields = [field for field in new_fieldnames if any(str(record.get(field, '')).strip() for record in filtered_records)]
269
+ # PERFORMANCE FIX: Single-pass optimization - determine used fields and calculate widths in one pass
270
+ used_fields = []
271
+ col_widths = {}
272
+
273
+ # First pass: initialize column widths with header lengths
274
+ for field in new_fieldnames:
275
+ col_widths[field] = len(field)
276
+
277
+ # Second pass: check for used fields and calculate max widths
278
+ for record in filtered_records:
279
+ for field in new_fieldnames:
280
+ value_str = str(record.get(field, ''))
281
+ if value_str.strip() and field not in used_fields:
282
+ used_fields.append(field)
283
+ if field in col_widths:
284
+ col_widths[field] = max(col_widths[field], len(value_str))
285
+
286
+ # Filter col_widths to only used fields
287
+ col_widths = {field: col_widths[field] for field in used_fields}
261
288
 
262
289
  if not used_fields:
263
290
  print("No data to display.")
264
291
  return
265
292
 
266
- # Calculate column widths based on the longest item in each used column
267
- col_widths = {field: len(field) for field in used_fields}
268
-
269
- for record in filtered_records:
270
- for field in used_fields:
271
- col_widths[field] = max(col_widths[field], len(str(record.get(field, ''))))
272
-
273
293
  # Print header
274
294
  header = " | ".join("{:<{}}".format(field, col_widths[field]) for field in used_fields)
275
295
  print(header)
@@ -725,33 +725,39 @@ if __name__ == "__main__":
725
725
  print(table_header)
726
726
  print("-" * len(table_header))
727
727
 
728
- # Set to keep track of processed patients
729
- processed_patients = set()
730
-
731
- # Loop through each payer_id and patient to call the API, then display the eligibility information
728
+ # PERFORMANCE FIX: Optimize patient-payer processing to avoid O(P×N) complexity
729
+ # Instead of nested loops, process each patient once and try payer_ids until success
730
+ # TODO: We should be able to determine the correct payer_id for each patient ahead of time
731
+ # by looking up their insurance information from the CSV data or crosswalk mapping.
732
+ # This would eliminate the need to try multiple payer_ids per patient and make this O(N).
732
733
  errors = []
733
734
  validation_reports = []
734
- total_patients = len(patients) * len(payer_ids)
735
735
  processed_count = 0
736
736
 
737
- for payer_id in payer_ids:
738
- for dob, member_id in patients:
739
- # Skip if this patient has already been processed
740
- if (dob, member_id) in processed_patients:
741
- continue
737
+ for dob, member_id in patients:
738
+ processed_count += 1
739
+ print("Processing patient {}/{}: Member ID {}, DOB {}".format(
740
+ processed_count, len(patients), member_id, dob))
741
+
742
+ # Try each payer_id for this patient until we get a successful response
743
+ patient_processed = False
744
+ for payer_id in payer_ids:
742
745
  try:
743
- processed_count += 1
744
- print("Processing patient {}/{}: Member ID {}, DOB {}".format(
745
- processed_count, total_patients, member_id, dob))
746
-
747
746
  # Run with validation enabled only in debug mode
748
747
  run_validation = DEBUG_MODE
749
748
  eligibility_data = get_eligibility_info(client, payer_id, provider_last_name, dob, member_id, npi, run_validation=run_validation)
750
749
  if eligibility_data is not None:
751
- display_eligibility_info(eligibility_data, dob, member_id, output_file) # Display as we get the result
752
- processed_patients.add((dob, member_id)) # Mark this patient as processed
750
+ display_eligibility_info(eligibility_data, dob, member_id, output_file)
751
+ patient_processed = True
752
+ break # Stop trying other payer_ids for this patient once we get a response
753
753
  except Exception as e:
754
- errors.append((dob, member_id, str(e)))
754
+ # Continue trying other payer_ids
755
+ continue
756
+
757
+ # If no payer_id worked for this patient, log the error
758
+ if not patient_processed:
759
+ error_msg = "No successful payer_id found for patient"
760
+ errors.append((dob, member_id, error_msg))
755
761
 
756
762
  # Display errors if any
757
763
  if errors:
@@ -1,10 +1,14 @@
1
1
  # MediLink_Parser.py
2
-
3
2
  import re
4
3
 
4
+ # Pre-compile regex patterns for better performance
5
+ _EBT_KEY_VALUE_PATTERN = re.compile(r'([^:]+):\s*(.+?)(?=\s{2,}[^:]+:|$)')
6
+ _ERA_SEGMENT_PATTERN = re.compile(r'\*')
7
+ _277_SEGMENT_PATTERN = re.compile(r'\*')
8
+
5
9
  def parse_era_content(content, debug=False):
6
10
  extracted_data = []
7
- normalized_content = content.replace('~\n', '~')
11
+ normalized_content = content.replace('~\n', '~') # Normalize line endings
8
12
  lines = normalized_content.split('~')
9
13
 
10
14
  record = {}
@@ -13,25 +17,27 @@ def parse_era_content(content, debug=False):
13
17
  is_payer_section = False
14
18
 
15
19
  for line in lines:
16
- segments = line.split('*')
20
+ segments = _ERA_SEGMENT_PATTERN.split(line)
17
21
 
18
22
  if segments[0] == 'TRN' and len(segments) > 2:
19
- check_eft = segments[2]
23
+ check_eft = segments[2] # Extract check/EFT number
20
24
 
21
25
  if segments[0] == 'N1':
22
26
  if segments[1] == 'PR':
23
- is_payer_section = True
27
+ is_payer_section = True # Enter payer section
24
28
  elif segments[1] == 'PE':
25
- is_payer_section = False
29
+ is_payer_section = False # Exit payer section
26
30
 
27
31
  if is_payer_section and segments[0] == 'N3' and len(segments) > 1:
28
- payer_address = segments[1]
32
+ payer_address = segments[1] # Extract payer address
29
33
 
30
34
  if segments[0] == 'CLP' and len(segments) >= 5:
31
35
  if record:
36
+ # Calculate adjustment amount if not explicitly provided
32
37
  if adjustment_amount == 0 and (write_off > 0 or patient_responsibility > 0):
33
38
  adjustment_amount = write_off + patient_responsibility
34
39
 
40
+ # Update record with calculated amounts
35
41
  record.update({
36
42
  'Payer Address': payer_address,
37
43
  'Allowed Amount': allowed_amount,
@@ -41,8 +47,10 @@ def parse_era_content(content, debug=False):
41
47
  })
42
48
  extracted_data.append(record)
43
49
 
50
+ # Reset counters for next record
44
51
  allowed_amount, write_off, patient_responsibility, adjustment_amount = 0, 0, 0, 0
45
52
 
53
+ # Start new record
46
54
  record = {
47
55
  'Check EFT': check_eft,
48
56
  'Chart Number': segments[1],
@@ -52,19 +60,28 @@ def parse_era_content(content, debug=False):
52
60
  }
53
61
 
54
62
  elif segments[0] == 'CAS':
55
- if segments[1] == 'CO':
56
- write_off += float(segments[3])
57
- elif segments[1] == 'PR':
58
- patient_responsibility += float(segments[3])
59
- elif segments[1] == 'OA':
60
- adjustment_amount += float(segments[3])
63
+ try:
64
+ if segments[1] == 'CO':
65
+ write_off += float(segments[3]) # Contractual obligation
66
+ elif segments[1] == 'PR':
67
+ patient_responsibility += float(segments[3]) # Patient responsibility
68
+ elif segments[1] == 'OA':
69
+ adjustment_amount += float(segments[3]) # Other adjustments
70
+ except (ValueError, IndexError):
71
+ # Skip malformed CAS segments
72
+ continue
61
73
 
62
74
  elif segments[0] == 'AMT' and segments[1] == 'B6':
63
- allowed_amount += float(segments[2])
75
+ try:
76
+ allowed_amount += float(segments[2]) # Allowed amount
77
+ except (ValueError, IndexError):
78
+ # Skip malformed AMT segments
79
+ continue
64
80
 
65
81
  elif segments[0] == 'DTM' and (segments[1] == '232' or segments[1] == '472'):
66
- record['Date of Service'] = segments[2]
82
+ record['Date of Service'] = segments[2] # Service date
67
83
 
84
+ # Process final record
68
85
  if record:
69
86
  if adjustment_amount == 0 and (write_off > 0 or patient_responsibility > 0):
70
87
  adjustment_amount = write_off + patient_responsibility
@@ -87,36 +104,36 @@ def parse_277_content(content, debug=False):
87
104
  segments = content.split('~')
88
105
  records = []
89
106
  current_record = {}
107
+
90
108
  for segment in segments:
91
- parts = segment.split('*')
109
+ parts = _277_SEGMENT_PATTERN.split(segment)
92
110
  if parts[0] == 'HL':
93
111
  if current_record:
94
- records.append(current_record)
95
- current_record = {}
112
+ records.append(current_record) # Save completed record
113
+ current_record = {} # Start new record
96
114
  elif parts[0] == 'NM1':
97
- if parts[1] == 'QC':
98
- current_record['Patient'] = parts[3] + ' ' + parts[4]
99
- elif parts[1] == '41':
100
- current_record['Clearing House'] = parts[3]
101
- elif parts[1] == 'PR':
102
- current_record['Payer'] = parts[3]
103
- elif parts[0] == 'TRN':
104
- current_record['Claim #'] = parts[2]
105
- elif parts[0] == 'STC':
106
- current_record['Status'] = parts[1]
115
+ if parts[1] == 'QC' and len(parts) > 4:
116
+ current_record['Patient'] = ' '.join([parts[3], parts[4]]) # Patient name
117
+ elif parts[1] == '41' and len(parts) > 3:
118
+ current_record['Clearing House'] = parts[3] # Clearing house
119
+ elif parts[1] == 'PR' and len(parts) > 3:
120
+ current_record['Payer'] = parts[3] # Payer name
121
+ elif parts[0] == 'TRN' and len(parts) > 2:
122
+ current_record['Claim #'] = parts[2] # Claim number
123
+ elif parts[0] == 'STC' and len(parts) > 1:
124
+ current_record['Status'] = parts[1] # Claim status
107
125
  if len(parts) > 4:
108
- current_record['Paid'] = parts[4]
109
- elif parts[0] == 'DTP':
126
+ current_record['Paid'] = parts[4] # Paid amount
127
+ elif parts[0] == 'DTP' and len(parts) > 3:
110
128
  if parts[1] == '472':
111
- current_record['Serv.'] = parts[3]
129
+ current_record['Serv.'] = parts[3] # Service date
112
130
  elif parts[1] == '050':
113
- current_record['Proc.'] = parts[3]
114
- elif parts[0] == 'AMT':
115
- if parts[1] == 'YU':
116
- current_record['Charged'] = parts[2]
131
+ current_record['Proc.'] = parts[3] # Process date
132
+ elif parts[0] == 'AMT' and parts[1] == 'YU' and len(parts) > 2:
133
+ current_record['Charged'] = parts[2] # Charged amount
117
134
 
118
135
  if current_record:
119
- records.append(current_record)
136
+ records.append(current_record) # Add final record
120
137
 
121
138
  if debug:
122
139
  print("Parsed 277 Content:")
@@ -135,17 +152,22 @@ def parse_dpt_content(content, debug=False):
135
152
  extracted_data = []
136
153
  lines = content.splitlines()
137
154
  record = {}
155
+
138
156
  for line in lines:
139
157
  if 'Patient Account Number:' in line:
140
158
  if record:
141
- extracted_data.append(record)
142
- record = {}
143
- parts = line.split(':')
144
- if len(parts) == 2:
145
- key, value = parts[0].strip(), parts[1].strip()
146
- record[key] = value
159
+ extracted_data.append(record) # Save completed record
160
+ record = {} # Start new record
161
+
162
+ # More efficient split - only split on first occurrence
163
+ colon_pos = line.find(':')
164
+ if colon_pos != -1:
165
+ key = line[:colon_pos].strip()
166
+ value = line[colon_pos + 1:].strip()
167
+ record[key] = value # Add key-value pair to current record
168
+
147
169
  if record:
148
- extracted_data.append(record)
170
+ extracted_data.append(record) # Add final record
149
171
 
150
172
  if debug:
151
173
  print("Parsed DPT Content:")
@@ -158,10 +180,7 @@ def parse_ebt_content(content, debug=False):
158
180
  extracted_data = [] # List to hold all extracted records
159
181
  lines = content.splitlines() # Split the content into individual lines
160
182
  record = {} # Dictionary to hold the current record being processed
161
-
162
- # Regular expression pattern to match key-value pairs in the format "Key: Value"
163
- key_value_pattern = re.compile(r'([^:]+):\s*(.+?)(?=\s{2,}[^:]+?:|$)')
164
-
183
+
165
184
  for line in lines:
166
185
  # Check for the start of a new record based on the presence of 'Patient Name'
167
186
  if 'Patient Name:' in line and record:
@@ -170,7 +189,7 @@ def parse_ebt_content(content, debug=False):
170
189
  record = {} # Reset the record for the next entry
171
190
 
172
191
  # Find all key-value pairs in the current line
173
- matches = key_value_pattern.findall(line)
192
+ matches = _EBT_KEY_VALUE_PATTERN.findall(line)
174
193
  for key, value in matches:
175
194
  key = key.strip() # Remove leading/trailing whitespace from the key
176
195
  value = value.strip() # Remove leading/trailing whitespace from the value
@@ -200,17 +219,22 @@ def parse_ibt_content(content, debug=False):
200
219
  extracted_data = []
201
220
  lines = content.splitlines()
202
221
  record = {}
222
+
203
223
  for line in lines:
204
224
  if 'Submitter Batch ID:' in line:
205
225
  if record:
206
- extracted_data.append(record)
207
- record = {}
208
- parts = line.split(':')
209
- if len(parts) == 2:
210
- key, value = parts[0].strip(), parts[1].strip()
211
- record[key] = value
226
+ extracted_data.append(record) # Save completed record
227
+ record = {} # Start new record
228
+
229
+ # More efficient split - only split on first occurrence
230
+ colon_pos = line.find(':')
231
+ if colon_pos != -1:
232
+ key = line[:colon_pos].strip()
233
+ value = line[colon_pos + 1:].strip()
234
+ record[key] = value # Add key-value pair to current record
235
+
212
236
  if record:
213
- extracted_data.append(record)
237
+ extracted_data.append(record) # Add final record
214
238
 
215
239
  if debug:
216
240
  print("Parsed IBT Content:")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: medicafe
3
- Version: 0.250723.4
3
+ Version: 0.250724.0
4
4
  Summary: MediCafe
5
5
  Home-page: https://github.com/katanada2
6
6
  Author: Daniel Vidaud
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: medicafe
3
- Version: 0.250723.4
3
+ Version: 0.250724.0
4
4
  Summary: MediCafe
5
5
  Home-page: https://github.com/katanada2
6
6
  Author: Daniel Vidaud
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='medicafe',
5
- version="0.250723.4",
5
+ version="0.250724.0",
6
6
  description='MediCafe',
7
7
  long_description="""
8
8
  # Project Overview: MediCafe
File without changes
File without changes
File without changes
File without changes