PyPI - medicafe - Versions diffs - 0.250728.9__py3-none-any.whl → 0.250805.0__py3-none-any.whl - Mend

medicafe 0.250728.9py3-none-any.whl → 0.250805.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of medicafe might be problematic. Click here for more details.

Files changed (57) hide show

MediBot/MediBot.bat +233 -19
MediBot/MediBot.py +138 -46
MediBot/MediBot_Crosswalk_Library.py +127 -623
MediBot/MediBot_Crosswalk_Utils.py +618 -0
MediBot/MediBot_Preprocessor.py +72 -17
MediBot/MediBot_Preprocessor_lib.py +470 -76
MediBot/MediBot_UI.py +32 -17
MediBot/MediBot_dataformat_library.py +68 -20
MediBot/MediBot_docx_decoder.py +120 -19
MediBot/MediBot_smart_import.py +180 -0
MediBot/__init__.py +89 -0
MediBot/get_medicafe_version.py +25 -0
MediBot/update_json.py +35 -6
MediBot/update_medicafe.py +19 -1
MediCafe/MediLink_ConfigLoader.py +160 -0
MediCafe/__init__.py +171 -0
MediCafe/__main__.py +222 -0
MediCafe/api_core.py +1098 -0
MediCafe/api_core_backup.py +427 -0
MediCafe/api_factory.py +306 -0
MediCafe/api_utils.py +356 -0
MediCafe/core_utils.py +450 -0
MediCafe/graphql_utils.py +445 -0
MediCafe/logging_config.py +123 -0
MediCafe/logging_demo.py +61 -0
MediCafe/migration_helpers.py +463 -0
MediCafe/smart_import.py +436 -0
MediLink/MediLink_837p_cob_library.py +28 -28
MediLink/MediLink_837p_encoder.py +33 -34
MediLink/MediLink_837p_encoder_library.py +226 -150
MediLink/MediLink_837p_utilities.py +129 -5
MediLink/MediLink_API_Generator.py +83 -60
MediLink/MediLink_API_v3.py +1 -1
MediLink/MediLink_ClaimStatus.py +177 -31
MediLink/MediLink_DataMgmt.py +378 -63
MediLink/MediLink_Decoder.py +20 -1
MediLink/MediLink_Deductible.py +155 -28
MediLink/MediLink_Display_Utils.py +72 -0
MediLink/MediLink_Down.py +127 -5
MediLink/MediLink_Gmail.py +712 -653
MediLink/MediLink_PatientProcessor.py +257 -0
MediLink/MediLink_UI.py +85 -71
MediLink/MediLink_Up.py +28 -4
MediLink/MediLink_insurance_utils.py +227 -230
MediLink/MediLink_main.py +248 -0
MediLink/MediLink_smart_import.py +264 -0
MediLink/__init__.py +93 -1
MediLink/insurance_type_integration_test.py +13 -3
MediLink/test.py +1 -1
MediLink/test_timing.py +59 -0
{medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/METADATA +1 -1
medicafe-0.250805.0.dist-info/RECORD +81 -0
medicafe-0.250805.0.dist-info/entry_points.txt +2 -0
{medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/top_level.txt +1 -0
medicafe-0.250728.9.dist-info/RECORD +0 -59
{medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/LICENSE +0 -0
{medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/WHEEL +0 -0

MediBot/MediBot_Preprocessor_lib.py CHANGED Viewed

@@ -1,8 +1,44 @@
-#MediBot_Preprocessor_lib.py
-from collections import OrderedDict, defaultdict
+# MediBot_Preprocessor_lib.py
+"""
+Core preprocessing library for MediBot
+Contains core preprocessing functions and utilities.
+"""
+import csv, time, os, sys
 from datetime import datetime, timedelta
-import os, csv, sys, time
-import chardet  # Ensure chardet is imported
+from collections import OrderedDict
+# Try to import chardet for encoding detection
+try:
+    import chardet
+except ImportError:
+    chardet = None  # Fallback if chardet is not available
+# Use core utilities for standardized imports
+from MediCafe.core_utils import (
+    import_medibot_module,
+    import_medilink_module,
+    get_config_loader_with_fallback
+)
+# Initialize configuration loader with fallback
+MediLink_ConfigLoader = get_config_loader_with_fallback()
+# Import MediLink_DataMgmt using centralized import function
+MediLink_DataMgmt = import_medilink_module('MediLink_DataMgmt')
+# Import MediBot modules using centralized import functions
+MediBot_UI = import_medibot_module('MediBot_UI')
+if MediBot_UI:
+    app_control = getattr(MediBot_UI, 'app_control', None)
+else:
+    app_control = None
+MediBot_docx_decoder = import_medibot_module('MediBot_docx_decoder')
+if MediBot_docx_decoder:
+    parse_docx = getattr(MediBot_docx_decoder, 'parse_docx', None)
+else:
+    parse_docx = None
 # Add the parent directory of the project to the Python path
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
@@ -11,22 +47,25 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 _config_cache = None
 _crosswalk_cache = None
-# Attempt to import necessary modules, falling back if they are not found
-try:
-    import MediLink_ConfigLoader
-    import MediLink_DataMgmt
-except ImportError:
-    from MediLink import MediLink_ConfigLoader, MediLink_DataMgmt
+# Use core utilities for standardized imports
+from MediCafe.core_utils import get_shared_config_loader
+MediLink_ConfigLoader = get_shared_config_loader()
+# Ensure MediLink_ConfigLoader is available
+if MediLink_ConfigLoader is None:
+    print("Warning: MediLink_ConfigLoader not available. Some functionality may be limited.")
+    # Create a minimal fallback logger
+    class FallbackLogger:
+        def log(self, message, level="INFO"):
+            print("[{}] {}".format(level, message))
+    MediLink_ConfigLoader = FallbackLogger()
+# Import centralized logging configuration
 try:
-    from MediBot_UI import app_control
-    from MediBot_docx_decoder import parse_docx
+    from MediCafe.logging_config import PERFORMANCE_LOGGING
 except ImportError:
-    from MediBot import MediBot_UI
-    app_control = MediBot_UI.app_control
-    from MediBot import MediBot_docx_decoder
-    parse_docx = MediBot_docx_decoder.parse_docx
+    # Fallback to local flag if centralized config is not available
+    PERFORMANCE_LOGGING = False
 class InitializationError(Exception):
     def __init__(self, message):
         self.message = message
@@ -109,9 +148,14 @@ def load_csv_data(csv_file_path):
         # Detect the file encoding
         with open(csv_file_path, 'rb') as f:
             raw_data = f.read()
-            result = chardet.detect(raw_data)
-            encoding = result['encoding']
-            confidence = result['confidence']
+            if chardet:
+                result = chardet.detect(raw_data)
+                encoding = result['encoding']
+                confidence = result['confidence']
+            else:
+                # Fallback to UTF-8 when chardet is not available
+                encoding = 'utf-8'
+                confidence = 1.0
             print("Detected encoding: {} (Confidence: {:.2f})".format(encoding, confidence))
         # Read the CSV file with the detected encoding
@@ -171,6 +215,49 @@ def filter_rows(csv_data):
     excluded_insurance = {'AETNA', 'AETNA MEDICARE', 'HUMANA MED HMO'}
     csv_data[:] = [row for row in csv_data if row.get('Patient ID') and row.get('Primary Insurance') not in excluded_insurance]
+def detect_date_format(date_str):
+    """
+    PERFORMANCE OPTIMIZATION: Quickly detect the most likely date format
+    to avoid trying all formats for every date string.
+    Parameters:
+    - date_str (str): The date string to analyze
+    Returns:
+    - str: The most likely format string, or None if unclear
+    """
+    if not date_str:
+        return None
+    # Remove time components if present
+    date_only = date_str.split()[0]
+    # Count separators to guess format
+    slash_count = date_only.count('/')
+    dash_count = date_only.count('-')
+    # Check for 4-digit year (likely YYYY format)
+    if len(date_only) >= 10:  # YYYY-MM-DD or YYYY/MM/DD
+        if dash_count == 2:
+            return '%Y-%m-%d'
+        elif slash_count == 2:
+            return '%Y/%m/%d'
+    # Check for 2-digit year (likely MM/DD/YY or MM-DD-YY)
+    if len(date_only) >= 8:  # MM/DD/YY or MM-DD-YY
+        if dash_count == 2:
+            return '%m-%d-%y'
+        elif slash_count == 2:
+            return '%m/%d/%y'
+    # Default to most common format (MM/DD/YYYY)
+    if dash_count == 2:
+        return '%m-%d-%Y'
+    elif slash_count == 2:
+        return '%m/%d/%Y'
+    return None
 def clean_surgery_date_string(date_str):
     """
     Cleans and normalizes surgery date strings to handle damaged data.
@@ -193,26 +280,68 @@ def clean_surgery_date_string(date_str):
     date_str = date_str.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
     date_str = ' '.join(date_str.split())  # Normalize whitespace
-    # Handle common date format variations
-    date_formats = [
-        '%m/%d/%Y',    # 12/25/2023
-        '%m-%d-%Y',    # 12-25-2023
-        '%m/%d/%y',    # 12/25/23
-        '%m-%d-%y',    # 12-25-23
-        '%Y/%m/%d',    # 2023/12/25
-        '%Y-%m-%d',    # 2023-12-25
-        '%m/%d/%Y %H:%M:%S',  # 12/25/2023 14:30:00
-        '%m-%d-%Y %H:%M:%S',  # 12-25-2023 14:30:00
-    ]
-    # Try to parse with different formats
-    for fmt in date_formats:
+    # PERFORMANCE OPTIMIZATION: Try detected format first
+    detected_format = detect_date_format(date_str)
+    if detected_format:
         try:
-            parsed_date = datetime.strptime(date_str, fmt)
-            # Return in standard MM/DD/YYYY format
+            parsed_date = datetime.strptime(date_str, detected_format)
             return parsed_date.strftime('%m/%d/%Y')
         except ValueError:
-            continue
+            pass
+    # PERFORMANCE OPTIMIZATION: Try most common format first (MM/DD/YYYY)
+    # This reduces the average number of format attempts from 8 to ~1-2
+    try:
+        parsed_date = datetime.strptime(date_str, '%m/%d/%Y')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
+    # PERFORMANCE OPTIMIZATION: Try second most common format (MM-DD-YYYY)
+    try:
+        parsed_date = datetime.strptime(date_str, '%m-%d-%Y')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
+    # PERFORMANCE OPTIMIZATION: Try 2-digit year formats only if needed
+    try:
+        parsed_date = datetime.strptime(date_str, '%m/%d/%y')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
+    try:
+        parsed_date = datetime.strptime(date_str, '%m-%d-%y')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
+    # PERFORMANCE OPTIMIZATION: Try YYYY formats only if needed
+    try:
+        parsed_date = datetime.strptime(date_str, '%Y/%m/%d')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
+    try:
+        parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
+    # PERFORMANCE OPTIMIZATION: Try datetime formats only if needed
+    try:
+        parsed_date = datetime.strptime(date_str, '%m/%d/%Y %H:%M:%S')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
+    try:
+        parsed_date = datetime.strptime(date_str, '%m-%d-%Y %H:%M:%S')
+        return parsed_date.strftime('%m/%d/%Y')
+    except ValueError:
+        pass
     # If no format matches, try to extract date components
     try:
@@ -248,31 +377,89 @@ def convert_surgery_date(csv_data):
     Parameters:
     - csv_data (list): List of dictionaries containing CSV row data
     """
-    for row in csv_data:
+    # TIMING: Start surgery date conversion with granular tracking
+    total_start_time = time.time()
+    date_cleaning_time = 0
+    date_parsing_time = 0
+    processed_count = 0
+    empty_count = 0
+    error_count = 0
+    print("Starting surgery date conversion for {} rows...".format(len(csv_data)))
+    # LOGGING STRATEGY: Only log start/end of looped events at INFO level, not individual successes
+    # MediLink_ConfigLoader.log("Starting surgery date conversion for {} rows...".format(len(csv_data)), level="INFO")  # REMOVED
+    # PERFORMANCE OPTIMIZATION: Pre-compile datetime.strptime for the most common format
+    # This avoids repeated format string parsing
+    standard_format = '%m/%d/%Y'
+    for row_idx, row in enumerate(csv_data, 1):
         surgery_date_str = row.get('Surgery Date', '')
         if not surgery_date_str:
-            MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
+            empty_count += 1
+            # LOGGING STRATEGY: Only log actual errors/failures, not routine empty dates
+            # if empty_count <= 5:  # Only log first 5 empty dates
+            #     MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
+            #     print("Surgery Date not found for row: {}".format(row))
             row['Surgery Date'] = datetime.min  # Assign a minimum datetime value if empty
-            print("Surgery Date not found for row: {}".format(row))
         else:
+            # TIMING: Start date string cleaning
+            cleaning_start = time.time()
             # Clean the date string first
             cleaned_date_str = clean_surgery_date_string(surgery_date_str)
+            # TIMING: End date string cleaning
+            cleaning_end = time.time()
+            date_cleaning_time += (cleaning_end - cleaning_start)
             if not cleaned_date_str:
-                MediLink_ConfigLoader.log("Error: Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row), level="ERROR")
+                error_count += 1
+                # LOGGING STRATEGY: Log actual errors (cleaning failures) at INFO level
+                if error_count <= 5:  # Only log first 5 errors
+                    MediLink_ConfigLoader.log("Error: Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row), level="INFO")
+                    print("Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row))
                 row['Surgery Date'] = datetime.min  # Assign a minimum datetime value if cleaning fails
-                print("Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row))
             else:
+                # TIMING: Start date parsing
+                parsing_start = time.time()
                 try:
+                    # PERFORMANCE OPTIMIZATION: Use pre-compiled format string
                     # Parse the cleaned date string
-                    row['Surgery Date'] = datetime.strptime(cleaned_date_str, '%m/%d/%Y')
-                    MediLink_ConfigLoader.log("Successfully cleaned and parsed Surgery Date '{}' -> '{}' for row: {}".format(
-                        surgery_date_str, cleaned_date_str, row), level="DEBUG")
+                    row['Surgery Date'] = datetime.strptime(cleaned_date_str, standard_format)
+                    processed_count += 1
+                    # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
+                    # if processed_count <= 10 or processed_count % 100 == 0:  # Log first 10 and every 100th
+                    #     MediLink_ConfigLoader.log("Successfully cleaned and parsed Surgery Date '{}' -> '{}' for row: {}".format(
+                    #         surgery_date_str, cleaned_date_str, row), level="DEBUG")
                 except ValueError as e:
-                    MediLink_ConfigLoader.log("Error parsing cleaned Surgery Date '{}': {} for row: {}".format(
-                        cleaned_date_str, e, row), level="ERROR")
+                    error_count += 1
+                    # LOGGING STRATEGY: Log actual errors (parsing failures) at INFO level
+                    if error_count <= 5:  # Only log first 5 parsing errors
+                        MediLink_ConfigLoader.log("Error parsing cleaned Surgery Date '{}': {} for row: {}".format(
+                            cleaned_date_str, e, row), level="INFO")
                     row['Surgery Date'] = datetime.min  # Assign a minimum datetime value if parsing fails
+                # TIMING: End date parsing
+                parsing_end = time.time()
+                date_parsing_time += (parsing_end - parsing_start)
+    # TIMING: End total surgery date conversion
+    total_end_time = time.time()
+    total_duration = total_end_time - total_start_time
+    if PERFORMANCE_LOGGING:
+        print("Surgery date conversion completed:")
+        print("  - Total duration: {:.2f} seconds".format(total_duration))
+        print("  - Date cleaning time: {:.2f} seconds ({:.1f}%)".format(date_cleaning_time, (date_cleaning_time/total_duration)*100))
+        print("  - Date parsing time: {:.2f} seconds ({:.1f}%)".format(date_parsing_time, (date_parsing_time/total_duration)*100))
+        print("  - Processed: {} rows, Empty: {} rows, Errors: {} rows".format(processed_count, empty_count, error_count))
+    # LOGGING STRATEGY: Log completion summary at INFO level (end of looped event)
+    MediLink_ConfigLoader.log("Surgery date conversion completed - Total: {:.2f}s, Cleaning: {:.2f}s, Parsing: {:.2f}s, Processed: {}, Empty: {}, Errors: {}".format(
+        total_duration, date_cleaning_time, date_parsing_time, processed_count, empty_count, error_count), level="INFO")
 def sort_and_deduplicate(csv_data):
     # Create a dictionary to hold unique patients based on Patient ID
@@ -479,13 +666,31 @@ def NEW_update_insurance_ids(csv_data, config, crosswalk):
                 row['Ins1 Insurance ID'] = None
 def update_insurance_ids(csv_data, config, crosswalk):
-    MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
+    # LOGGING STRATEGY: Remove DEBUG level function start log - DEBUG is typically silent anyway
+    # MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
+    # TIMING: Start insurance ID updates with granular tracking
+    total_start_time = time.time()
+    lookup_build_time = 0
+    csv_processing_time = 0
+    processed_count = 0
+    medicare_count = 0
+    regular_count = 0
+    placeholder_count = 0
+    print("Starting insurance ID updates for {} rows...".format(len(csv_data)))
+    # LOGGING STRATEGY: Only log start/end of looped events at INFO level, not individual successes
+    # MediLink_ConfigLoader.log("Starting insurance ID updates for {} rows...".format(len(csv_data)), level="INFO")  # REMOVED
+    # TIMING: Start lookup dictionary building
+    lookup_start_time = time.time()
     # PERFORMANCE FIX: Pre-build optimized lookup dictionaries for both regular and Medicare IDs
     # This reduces Medicare processing overhead by building lookups once instead of repeated processing
     payer_id_to_medisoft = {}
     payer_id_to_medicare = {}
-    MediLink_ConfigLoader.log("Initialized optimized lookup dictionaries for Medicare and regular IDs.", level="DEBUG")
+    # LOGGING STRATEGY: Remove DEBUG level initialization log - DEBUG is typically silent anyway
+    # MediLink_ConfigLoader.log("Initialized optimized lookup dictionaries for Medicare and regular IDs.", level="DEBUG")
     # Build both lookup dictionaries simultaneously to avoid multiple iterations
     for payer_id, details in crosswalk.get('payer_id', {}).items():
@@ -501,14 +706,28 @@ def update_insurance_ids(csv_data, config, crosswalk):
         payer_id_to_medisoft[payer_id] = int(medisoft_ids[0]) if medisoft_ids else None
         payer_id_to_medicare[payer_id] = int(medicare_ids[0]) if medicare_ids else None
-        MediLink_ConfigLoader.log("Processed Payer ID '{}': Regular IDs: {}, Medicare IDs: {}".format(
-            payer_id, medisoft_ids, medicare_ids), level="DEBUG")
+        # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
+        # if len(payer_id_to_medisoft) <= 10 or len(payer_id_to_medisoft) % 50 == 0:  # Log first 10 and every 50th
+        #     MediLink_ConfigLoader.log("Processed Payer ID '{}': Regular IDs: {}, Medicare IDs: {}".format(
+        #         payer_id, medisoft_ids, medicare_ids), level="DEBUG")
+    # TIMING: End lookup dictionary building
+    lookup_end_time = time.time()
+    lookup_build_time = lookup_end_time - lookup_start_time
+    if PERFORMANCE_LOGGING:
+        print("Built lookup dictionaries in {:.2f} seconds for {} payer IDs".format(lookup_build_time, len(payer_id_to_medisoft)))
+    # TIMING: Start CSV processing
+    csv_start_time = time.time()
     # PERFORMANCE FIX: Single pass through CSV data with optimized Medicare ID resolution
     for row_idx, row in enumerate(csv_data, 1):
         ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
-        # PERFORMANCE FIX: Use enumerate index instead of csv_data.index() which is O(n)
-        MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(row_idx, ins1_payer_id), level="DEBUG")
+        # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
+        # if row_idx <= 10 or row_idx % 100 == 0:  # Log first 10 and every 100th
+        #     MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(row_idx, ins1_payer_id), level="DEBUG")
         # Try Medicare ID first, then fall back to regular ID (optimized Medicare processing)
         insurance_id = (payer_id_to_medicare.get(ins1_payer_id) or
@@ -523,11 +742,41 @@ def update_insurance_ids(csv_data, config, crosswalk):
                 'medisoft_medicare_id': [],  # Placeholder for future Medicare IDs
                 'endpoint': None  # Placeholder for future endpoint
             }
-            MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
+            placeholder_count += 1
+            # LOGGING STRATEGY: Log actual events (new payer IDs) at INFO level
+            if placeholder_count <= 5:  # Only log first 5 placeholders
+                MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
+        elif insurance_id == payer_id_to_medicare.get(ins1_payer_id):
+            medicare_count += 1
+        else:
+            regular_count += 1
         # Assign the resolved insurance ID to the row
         row['Ins1 Insurance ID'] = insurance_id
-        MediLink_ConfigLoader.log("Assigned Insurance ID '{}' to row with Ins1 Payer ID '{}'.".format(insurance_id, ins1_payer_id), level="DEBUG")
+        processed_count += 1
+        # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
+        # if processed_count <= 10 or processed_count % 100 == 0:  # Log first 10 and every 100th
+        #     MediLink_ConfigLoader.log("Assigned Insurance ID '{}' to row with Ins1 Payer ID '{}'.".format(insurance_id, ins1_payer_id), level="DEBUG")
+    # TIMING: End CSV processing
+    csv_end_time = time.time()
+    csv_processing_time = csv_end_time - csv_start_time
+    # TIMING: End total insurance ID updates
+    total_end_time = time.time()
+    total_duration = total_end_time - total_start_time
+    if PERFORMANCE_LOGGING:
+        print("Insurance ID updates completed:")
+        print("  - Total duration: {:.2f} seconds".format(total_duration))
+        print("  - Lookup building time: {:.2f} seconds ({:.1f}%)".format(lookup_build_time, (lookup_build_time/total_duration)*100))
+        print("  - CSV processing time: {:.2f} seconds ({:.1f}%)".format(csv_processing_time, (csv_processing_time/total_duration)*100))
+    print("  - Processed: {} rows, Medicare: {} rows, Regular: {} rows, Placeholders: {} rows".format(
+        processed_count, medicare_count, regular_count, placeholder_count))
+    # LOGGING STRATEGY: Log completion summary at INFO level (end of looped event)
+    MediLink_ConfigLoader.log("Insurance ID updates completed - Total: {:.2f}s, Lookup: {:.2f}s, Processing: {:.2f}s, Processed: {}, Medicare: {}, Regular: {}, Placeholders: {}".format(
+        total_duration, lookup_build_time, csv_processing_time, processed_count, medicare_count, regular_count, placeholder_count), level="INFO")
 def update_procedure_codes(csv_data, crosswalk):
@@ -591,6 +840,11 @@ def update_procedure_codes(csv_data, crosswalk):
 def update_diagnosis_codes(csv_data):
     try:
+        # TIMING: Start surgery schedule parsing timing
+        parsing_start_time = time.time()
+        print("Starting surgery schedule parsing at: {}".format(time.strftime("%H:%M:%S")))
+        MediLink_ConfigLoader.log("Starting surgery schedule parsing at: {}".format(time.strftime("%H:%M:%S")), level="INFO")
         # Use cached configuration instead of loading repeatedly
         config, crosswalk = get_cached_configuration()
@@ -613,7 +867,7 @@ def update_diagnosis_codes(csv_data):
         min_surgery_date = min(surgery_dates)
         max_surgery_date = max(surgery_dates)
-        # Apply a ±8-day margin to the surgery dates... Increased from 5 days.
+        # Apply a +/-8-day margin to the surgery dates... Increased from 5 days.
         margin = timedelta(days=8)
         threshold_start = min_surgery_date - margin
         threshold_end = max_surgery_date + margin
@@ -625,6 +879,9 @@ def update_diagnosis_codes(csv_data):
         MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
+        # TIMING: Start file system operations
+        filesystem_start_time = time.time()
         # PERFORMANCE OPTIMIZATION: Batch file system operations with caching
         # Pre-convert threshold timestamps for efficient comparison (Windows XP compatible)
         threshold_start_ts = threshold_start.timestamp() if hasattr(threshold_start, 'timestamp') else time.mktime(threshold_start.timetuple())
@@ -632,26 +889,33 @@ def update_diagnosis_codes(csv_data):
         valid_files = []
         try:
-            # Use os.scandir() with optimized timestamp comparison (XP/3.4.4 compatible)
-            with os.scandir(local_storage_path) as entries:
-                for entry in entries:
-                    if entry.name.endswith('.docx'):
-                        # Get file modification time in single operation
-                        try:
-                            stat_info = entry.stat()
-                            # Direct timestamp comparison avoids datetime conversion overhead
-                            if threshold_start_ts <= stat_info.st_mtime <= threshold_end_ts:
-                                valid_files.append(entry.path)
-                        except (OSError, ValueError):
-                            # Skip files with invalid modification times
-                            continue
+            # Use os.listdir() with optimized timestamp comparison (XP/3.4.4 compatible)
+            for filename in os.listdir(local_storage_path):
+                if filename.endswith('.docx'):
+                    filepath = os.path.join(local_storage_path, filename)
+                    # Get file modification time in single operation
+                    try:
+                        stat_info = os.stat(filepath)
+                        # Direct timestamp comparison avoids datetime conversion overhead
+                        if threshold_start_ts <= stat_info.st_mtime <= threshold_end_ts:
+                            valid_files.append(filepath)
+                    except (OSError, ValueError):
+                        # Skip files with invalid modification times
+                        continue
         except OSError:
             MediLink_ConfigLoader.log("Error accessing directory: {}".format(local_storage_path), level="ERROR")
             return
+        # TIMING: End file system operations
+        filesystem_end_time = time.time()
+        filesystem_duration = filesystem_end_time - filesystem_start_time
         # PERFORMANCE OPTIMIZATION: Log file count for debugging without processing overhead
         MediLink_ConfigLoader.log("Found {} DOCX files within date threshold".format(len(valid_files)), level="INFO")
+        # TIMING: Start CSV data preprocessing
+        csv_prep_start_time = time.time()
         # PERFORMANCE OPTIMIZATION: Pre-process patient IDs for efficient lookup
         # Create a set of patient IDs from CSV data for faster lookups
         patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
@@ -666,20 +930,81 @@ def update_diagnosis_codes(csv_data):
                 surgery_date_strings[patient_id] = surgery_date.strftime("%m-%d-%Y")
             else:
                 surgery_date_strings[patient_id] = ''
+        # TIMING: End CSV data preprocessing
+        csv_prep_end_time = time.time()
+        csv_prep_duration = csv_prep_end_time - csv_prep_start_time
+        # TIMING: Log before processing DOCX files
+        docx_processing_start_time = time.time()
+        print("Found {} DOCX files to process. Starting DOCX parsing...".format(len(valid_files)))
+        MediLink_ConfigLoader.log("Found {} DOCX files to process. Starting DOCX parsing...".format(len(valid_files)), level="INFO")
+        # TIMING: Track individual DOCX file processing
+        docx_files_processed = 0
+        docx_files_skipped = 0
+        docx_parse_errors = 0
         # Process valid DOCX files
         for filepath in valid_files:
-            MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
+            # TIMING: Start individual file processing
+            file_start_time = time.time()
             try:
                 patient_data = parse_docx(filepath, surgery_dates)  # Pass surgery_dates to parse_docx
+                docx_files_processed += 1
                 # PERFORMANCE OPTIMIZATION: Use defaultdict for more efficient dictionary operations
                 for patient_id, service_dates in patient_data.items():
                     if patient_id not in all_patient_data:
                         all_patient_data[patient_id] = {}
                     for date_of_service, diagnosis_data in service_dates.items():
+                        # TODO: SURGERY SCHEDULE CONFLICT RESOLUTION
+                        # Implement enhanced conflict detection and logging as outlined in
+                        # surgery_schedule_conflict_resolution_strategy.md
+                        #
+                        # Current behavior: Silent overwriting with latest file wins
+                        # Proposed enhancement:
+                        # 1. Detect when multiple files contain data for same date
+                        # 2. Log conflicts with date-organized notifications showing:
+                        #    - Source files (with modification timestamps)
+                        #    - Patients affected (added/removed/modified)
+                        #    - Specific changes (diagnosis, laterality, etc.)
+                        # 3. Use file modification time to determine priority
+                        # 4. Generate summary report organized by surgery date
+                        #
+                        # Example notification format:
+                        # "SURGERY SCHEDULE CONFLICTS DETECTED FOR: 12/15/2023"
+                        # "  Original: file1.docx (modified: 08:30:00)"
+                        # "  Revised: file2.docx (modified: 14:45:00)"
+                        # "  Patients affected: 3 modified, 1 added, 1 removed"
+                        # "  Resolution: Using latest file (file2.docx)"
+                        #
+                        # This will provide transparency when revised schedules overwrite
+                        # original schedules, organized by the affected surgery dates.
                         all_patient_data[patient_id][date_of_service] = diagnosis_data
             except Exception as e:
+                docx_parse_errors += 1
                 MediLink_ConfigLoader.log("Error parsing DOCX file {}: {}".format(filepath, e), level="ERROR")
+            # TIMING: End individual file processing
+            file_end_time = time.time()
+            file_duration = file_end_time - file_start_time
+            # Log slow files (taking more than 1 second)
+            if file_duration > 1.0 and PERFORMANCE_LOGGING:
+                print("  - Slow file: {} (Duration: {:.2f} seconds)".format(os.path.basename(filepath), file_duration))
+        # TIMING: Log DOCX processing completion
+        docx_processing_end_time = time.time()
+        docx_processing_duration = docx_processing_end_time - docx_processing_start_time
+        if PERFORMANCE_LOGGING:
+            print("DOCX parsing completed at: {} (Duration: {:.2f} seconds)".format(
+                time.strftime("%H:%M:%S"), docx_processing_duration))
+            print("  - Files processed: {}, Files skipped: {}, Parse errors: {}".format(
+                docx_files_processed, docx_files_skipped, docx_parse_errors))
+        MediLink_ConfigLoader.log("DOCX parsing completed at: {} (Duration: {:.2f} seconds)".format(
+            time.strftime("%H:%M:%S"), docx_processing_duration), level="INFO")
         # Log if no valid files were found
         if not valid_files:
@@ -693,6 +1018,9 @@ def update_diagnosis_codes(csv_data):
             MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
             return  # Exit the function early if no data is available
+        # TIMING: Start CSV data matching
+        csv_matching_start_time = time.time()
         # Get Medisoft shorthand dictionary from crosswalk.
         diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {})
@@ -736,9 +1064,27 @@ def update_diagnosis_codes(csv_data):
             else:
                 MediLink_ConfigLoader.log("Patient ID: {} not found in DOCX data for row {}.".format(patient_id, row_num), level="INFO")
+        # TIMING: End CSV data matching
+        csv_matching_end_time = time.time()
+        csv_matching_duration = csv_matching_end_time - csv_matching_start_time
         # Log total count of updated rows
         MediLink_ConfigLoader.log("Total {} 'Default Diagnosis #1' rows updated.".format(updated_count), level="INFO")
+        # TIMING: End surgery schedule parsing timing
+        parsing_end_time = time.time()
+        parsing_duration = parsing_end_time - parsing_start_time
+        if PERFORMANCE_LOGGING:
+            print("Surgery schedule parsing completed at: {} (Duration: {:.2f} seconds)".format(
+                time.strftime("%H:%M:%S"), parsing_duration))
+            print("  - File system operations: {:.2f} seconds ({:.1f}%)".format(filesystem_duration, (filesystem_duration/parsing_duration)*100))
+            print("  - CSV data preprocessing: {:.2f} seconds ({:.1f}%)".format(csv_prep_duration, (csv_prep_duration/parsing_duration)*100))
+            print("  - DOCX file processing: {:.2f} seconds ({:.1f}%)".format(docx_processing_duration, (docx_processing_duration/parsing_duration)*100))
+            print("  - CSV data matching: {:.2f} seconds ({:.1f}%)".format(csv_matching_duration, (csv_matching_duration/parsing_duration)*100))
+            print("  - Files processed: {}, Files skipped: {}, Parse errors: {}".format(docx_files_processed, docx_files_skipped, docx_parse_errors))
+        MediLink_ConfigLoader.log("Surgery schedule parsing completed at: {} (Duration: {:.2f} seconds)".format(
+            time.strftime("%H:%M:%S"), parsing_duration), level="INFO")
     except Exception as e:
         message = "An error occurred while updating diagnosis codes. Please check the DOCX files and configuration: {}".format(e)
         MediLink_ConfigLoader.log(message, level="ERROR")
@@ -802,11 +1148,59 @@ def load_insurance_data_from_mains(config):
     # Initialize the dictionary to hold the insurance to insurance ID mappings
     insurance_to_id = {}
-    # Read data from MAINS using a provided function to handle fixed-width data
-    for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
-        insurance_name = record['MAINSNAME']
-        # Assuming line_number gives the correct insurance ID without needing adjustment
-        insurance_to_id[insurance_name] = line_number
+    try:
+        # Check if MAINS file exists before attempting to read
+        if not os.path.exists(mains_path):
+            error_msg = "CRITICAL: MAINS file not found at: {}. This file is required for insurance name to Medisoft ID mapping.".format(mains_path)
+            MediLink_ConfigLoader.log(error_msg, level="CRITICAL")
+            print("\n" + "="*80)
+            print("CRITICAL ERROR: MAINS FILE MISSING")
+            print("="*80)
+            print("\nThe MAINS file is required for the following critical functions:")
+            print("* Mapping insurance company names to Medisoft IDs")
+            print("* Converting insurance names to payer IDs for claim submission")
+            print("* Creating properly formatted 837p claim files")
+            print("\nWithout this file, claim submission will fail because:")
+            print("* Insurance names cannot be converted to payer IDs")
+            print("* 837p claim files cannot be generated")
+            print("* Claims cannot be submitted to insurance companies")
+            print("\nTO FIX THIS:")
+            print("1. Ensure the MAINS file exists at: {}".format(mains_path))
+            print("2. If the file is missing, llamar a Dani")
+            print("3. The file should contain insurance company data from your Medisoft system")
+            print("="*80)
+            return insurance_to_id
+        # Read data from MAINS using a provided function to handle fixed-width data
+        for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
+            insurance_name = record['MAINSNAME']
+            # Assuming line_number gives the correct insurance ID without needing adjustment
+            insurance_to_id[insurance_name] = line_number
+        MediLink_ConfigLoader.log("Successfully loaded {} insurance records from MAINS".format(len(insurance_to_id)), level="INFO")
+    except FileNotFoundError:
+        error_msg = "CRITICAL: MAINS file not found: {}. This file is required for insurance name to Medisoft ID mapping.".format(mains_path)
+        MediLink_ConfigLoader.log(error_msg, level="CRITICAL")
+        print("\n" + "="*80)
+        print("CRITICAL ERROR: MAINS FILE MISSING")
+        print("="*80)
+        print("\nThe MAINS file is required for the following critical functions:")
+        print("* Mapping insurance company names to Medisoft IDs")
+        print("* Converting insurance names to payer IDs for claim submission")
+        print("* Creating properly formatted 837p claim files")
+        print("\nWithout this file, claim submission will fail because:")
+        print("* Insurance names cannot be converted to payer IDs")
+        print("* 837p claim files cannot be generated")
+        print("* Claims cannot be submitted to insurance companies")
+        print("\nTO FIX THIS:")
+        print("1. Ensure the MAINS file exists at: {}".format(mains_path))
+        print("2. If the file is missing, llamar a Dani")
+        print("3. The file should contain insurance company data from your Medisoft system")
+        print("="*80)
+    except Exception as e:
+        MediLink_ConfigLoader.log("Error loading MAINS data: {}. Continuing without MAINS data.".format(str(e)), level="ERROR")
+        print("Error loading MAINS data: {}. Continuing without MAINS data.".format(str(e)))
     return insurance_to_id
@@ -852,7 +1246,7 @@ def parse_z_dat(z_dat_path, config): # Why is this in MediBot and not MediLink?
     try:
         # Reading blocks of fixed-width data (up to 5 lines per record)
         for personal_info, insurance_info, service_info, service_info_2, service_info_3 in MediLink_DataMgmt.read_fixed_width_data(z_dat_path):
-            # Parsing the data using slice definitions from the config
+            # Parse Z.dat reserved record format: 3 active + 2 reserved lines
             parsed_data = MediLink_DataMgmt.parse_fixed_width_data(personal_info, insurance_info, service_info, service_info_2, service_info_3, config.get('MediLink_Config', config))
             # Extract Patient ID and Insurance Name from parsed data

medicafe 0.250728.9__py3-none-any.whl → 0.250805.0__py3-none-any.whl

Potentially problematic release.

medicafe 0.250728.9py3-none-any.whl → 0.250805.0py3-none-any.whl