PyPI - medicafe - Versions diffs - 0.240809.0__py3-none-any.whl → 0.241015.0__py3-none-any.whl - Mend

medicafe 0.240809.0py3-none-any.whl → 0.241015.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of medicafe might be problematic. Click here for more details.

Files changed (32) hide show

MediBot/MediBot.bat +73 -16
MediBot/MediBot.py +90 -79
MediBot/MediBot_Crosswalk_Library.py +496 -194
MediBot/MediBot_Preprocessor.py +22 -14
MediBot/MediBot_Preprocessor_lib.py +299 -153
MediBot/MediBot_UI.py +25 -24
MediBot/MediBot_dataformat_library.py +17 -25
MediBot/MediBot_docx_decoder.py +267 -110
MediBot/update_json.py +26 -1
MediBot/update_medicafe.py +134 -44
MediLink/MediLink.py +93 -51
MediLink/MediLink_837p_encoder.py +23 -23
MediLink/MediLink_837p_encoder_library.py +141 -96
MediLink/MediLink_API_Generator.py +1 -7
MediLink/MediLink_API_v3.py +241 -59
MediLink/MediLink_APIs.py +1 -2
MediLink/MediLink_ClaimStatus.py +21 -6
MediLink/MediLink_ConfigLoader.py +8 -8
MediLink/MediLink_DataMgmt.py +321 -100
MediLink/MediLink_Decoder.py +249 -87
MediLink/MediLink_Deductible.py +7 -8
MediLink/MediLink_Down.py +115 -120
MediLink/MediLink_Gmail.py +7 -16
MediLink/MediLink_Parser.py +63 -36
MediLink/MediLink_UI.py +29 -24
MediLink/MediLink_Up.py +12 -8
{medicafe-0.240809.0.dist-info → medicafe-0.241015.0.dist-info}/METADATA +1 -1
medicafe-0.241015.0.dist-info/RECORD +47 -0
{medicafe-0.240809.0.dist-info → medicafe-0.241015.0.dist-info}/WHEEL +1 -1
medicafe-0.240809.0.dist-info/RECORD +0 -47
{medicafe-0.240809.0.dist-info → medicafe-0.241015.0.dist-info}/LICENSE +0 -0
{medicafe-0.240809.0.dist-info → medicafe-0.241015.0.dist-info}/top_level.txt +0 -0

MediBot/MediBot_UI.py CHANGED Viewed

@@ -1,22 +1,18 @@
-from sys import exit
-import ctypes
+#MediBot_UI.py
+import ctypes, time, re, os, sys
 from ctypes import wintypes
-import time
-import re
-# Add parent directory of the project to the Python path
-import os
-import sys
+from sys import exit
 project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-sys.path.append(project_dir)
+if project_dir not in sys.path:
+    sys.path.append(project_dir)
 try:
     from MediLink import MediLink_ConfigLoader
-    config, crosswalk = MediLink_ConfigLoader.load_configuration()
 except ImportError:
-    from MediLink_ConfigLoader import load_configuration
-    config, crosswalk = load_configuration()
+    import MediLink_ConfigLoader
+# Load configuration
+config, crosswalk = MediLink_ConfigLoader.load_configuration()
 # Function to check if a specific key is pressed
 VK_END = int(config.get('VK_END', ""), 16) # Try F12 (7B). Virtual key code for 'End' (23)
@@ -50,6 +46,7 @@ class AppControl:
     def load_paths_from_config(self, medicare=False):
         # Assuming `config` is a module or a globally accessible configuration dictionary
+        # TODO Is this where the MAINS paths should also be set?
         if medicare:
             self.mapat_med_path = config.get('MEDICARE_MAPAT_MED_PATH', "")
             self.medisoft_shortcut = config.get('MEDICARE_SHORTCUT', "")
@@ -211,18 +208,22 @@ def user_interaction(csv_data, interaction_mode, error_message, reverse_mapping)
         display_menu_header("            =(^.^)= Welcome to MediBot! =(^.^)=")
         while True:
-            response = input("\nAm I processing Medicare patients? (yes/no): ").lower().strip()
-            if response:
-                if response in ['yes', 'y']:
-                    app_control.load_paths_from_config(medicare=True)
-                    break
-                elif response in ['no', 'n']:
-                    app_control.load_paths_from_config(medicare=False)
-                    break
+            try:
+                response = input("\nAm I processing Medicare patients? (yes/no): ").lower().strip()
+                if response:
+                    if response in ['yes', 'y']:
+                        app_control.load_paths_from_config(medicare=True)
+                        break
+                    elif response in ['no', 'n']:
+                        app_control.load_paths_from_config(medicare=False)
+                        break
+                    else:
+                        print("Invalid entry. Please enter 'yes' or 'no'.")
                 else:
-                    print("Invalid entry. Please enter 'yes' or 'no'.")
-            else:
-                print("A response is required. Please try again.")
+                    print("A response is required. Please try again.")
+            except KeyboardInterrupt:
+                print("\nOperation cancelled by user. Exiting script.")
+                exit()
         fixed_values = config.get('fixed_values', {})  # Get fixed values from config json
         if response in ['yes', 'y']:

MediBot/MediBot_dataformat_library.py CHANGED Viewed

@@ -1,3 +1,4 @@
+#MediBot_dataformat_library.py
 import re
 from datetime import datetime
 import re  #for addresses
@@ -159,31 +160,22 @@ def format_zip(value):
     return value_str[:5]
 def format_data(medisoft_field, value, csv_data, reverse_mapping, parsed_address_components):
-    if medisoft_field == 'Patient Name':
-        formatted_value = format_name(value)
-    elif medisoft_field == 'Birth Date':
-        formatted_value = format_date(value)
-    elif medisoft_field == 'Phone':
-        formatted_value = format_phone(value)
-    elif medisoft_field == 'Phone #2':
-        formatted_value = format_phone(value)
-    elif medisoft_field == 'Gender':
-        formatted_value = format_gender(value)
-    elif medisoft_field == 'Street':
-        formatted_value = format_street(value, csv_data, reverse_mapping, parsed_address_components)
-    elif medisoft_field == 'Zip Code':
-        formatted_value = format_zip(value)
-    elif medisoft_field == 'Primary Policy Number':
-        formatted_value = format_policy(value)
-    elif medisoft_field == 'Secondary Policy Number':
-        formatted_value = format_policy(value)
-    elif medisoft_field == 'Primary Group Number':
-        formatted_value = format_policy(value)
-    elif medisoft_field == 'Secondary Group Number':
-        formatted_value = format_policy(value)
-    else:
-        formatted_value = str(value) # Ensure value is always a string
+    formatters = {
+        'Patient Name': format_name,
+        'Birth Date': format_date,
+        'Phone': format_phone,
+        'Phone #2': format_phone,
+        'Gender': format_gender,
+        'Street': lambda v: format_street(v, csv_data, reverse_mapping, parsed_address_components),
+        'Zip Code': format_zip,
+        'Primary Policy Number': format_policy,
+        'Secondary Policy Number': format_policy,
+        'Primary Group Number': format_policy,
+        'Secondary Group Number': format_policy
+    }
+    formatted_value = formatters.get(medisoft_field, str)(value)  # Default to str if not found
     formatted_value = formatted_value.replace(',', '{,}').replace(' ', '{Space}')
     ahk_command = 'SendInput, {}{{Enter}}'.format(formatted_value)
     return ahk_command

MediBot/MediBot_docx_decoder.py CHANGED Viewed

@@ -1,11 +1,9 @@
-from docx import Document
-import re
-from lxml import etree
-import zipfile
+#MediBot_docx_decoder.py
 from datetime import datetime
-import os
-import sys
 from collections import OrderedDict
+import os, re, sys, zipfile
+from docx import Document
+from lxml import etree
 # Add parent directory of the project to the Python path
 project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
@@ -16,11 +14,12 @@ try:
 except ImportError:
     from MediLink import MediLink_ConfigLoader
-def parse_docx(filepath):
+def parse_docx(filepath, surgery_dates):  # Accept surgery_dates as a parameter
     try:
         doc = Document(filepath)  # Open the .docx file
     except Exception as e:
-        MediLink_ConfigLoader.log("Error opening document: {}".format(e))  # Log error
+        MediLink_ConfigLoader.log("Error opening document: {}".format(e), level="ERROR")  # Log error
         return {}
     patient_data = OrderedDict()  # Initialize OrderedDict to store data
@@ -29,6 +28,18 @@ def parse_docx(filepath):
     date_of_service = extract_date_of_service(filepath)  # Extract date of service
     MediLink_ConfigLoader.log("Date of Service recorded as: {}".format(date_of_service), level="DEBUG")
+    # Convert date_of_service to match the format of surgery_dates
+    date_of_service = datetime.strptime(date_of_service, '%m-%d-%Y')  # Convert to datetime object
+    # Check if the date_of_service is in the passed surgery_dates
+    if date_of_service not in surgery_dates:  # Direct comparison with datetime objects
+        MediLink_ConfigLoader.log("Date of Service {} not found in provided surgery dates. Skipping document.".format(date_of_service), level="DEBUG")
+        return {}  # Early exit if date is not found
+    MediLink_ConfigLoader.log("Date of Service {} found in surgery dates. Proceeding with parsing of the document.".format(date_of_service), level="DEBUG")  # Log that date of service was found
+    # Convert back to MM-DD-YYYY format.
+    # TODO in the future, maybe just do the treatment to surgery_dates, no need to convert back and forth..
+    date_of_service = date_of_service.strftime('%m-%d-%Y')
     for table in doc.tables:  # Iterate over tables in the document
         for row in table.rows:
             cells = [cell.text.strip() for cell in row.cells]
@@ -43,11 +54,11 @@ def parse_docx(filepath):
                         patient_data[patient_id] = {}
                     if date_of_service in patient_data[patient_id]:
-                        MediLink_ConfigLoader.log("Duplicate entry for patient ID {} on date {}. Skipping.".format(patient_id, date_of_service))
+                        MediLink_ConfigLoader.log("Duplicate entry for patient ID {} on date {}. Skipping.".format(patient_id, date_of_service), level="WARNING")
                     else:
                         patient_data[patient_id][date_of_service] = [diagnosis_code, left_or_right_eye, femto_yes_or_no]
                 except Exception as e:
-                    MediLink_ConfigLoader.log("Error processing row: {}. Error: {}".format(cells, e))
+                    MediLink_ConfigLoader.log("Error processing row: {}. Error: {}".format(cells, e), level="ERROR")
     # Validation steps
     validate_unknown_entries(patient_data)
@@ -55,17 +66,18 @@ def parse_docx(filepath):
     return patient_data
 def validate_unknown_entries(patient_data):
     for patient_id, dates in list(patient_data.items()):
         for date, details in list(dates.items()):
             if 'Unknown' in details:
                 warning_message = "Warning: 'Unknown' entry found. Patient ID: {}, Date: {}, Details: {}".format(patient_id, date, details)
                 MediLink_ConfigLoader.log(warning_message, level="WARNING")
-                print(warning_message)
                 del patient_data[patient_id][date]
         if not patient_data[patient_id]:  # If no dates left for the patient, remove the patient
             del patient_data[patient_id]
 def validate_diagnostic_code(patient_data):
     for patient_id, dates in patient_data.items():
         for date, details in dates.items():
@@ -76,125 +88,133 @@ def validate_diagnostic_code(patient_data):
                 elif eye == 'Right' and not diagnostic_code.endswith('1'):
                     log_and_warn(patient_id, date, diagnostic_code, eye)
 def log_and_warn(patient_id, date, diagnostic_code, eye):
     warning_message = (
         "Warning: Mismatch found for Patient ID: {}, Date: {}, "
         "Diagnostic Code: {}, Eye: {}".format(patient_id, date, diagnostic_code, eye)
     )
     MediLink_ConfigLoader.log(warning_message, level="WARNING")
-    print(warning_message)
-# Extract and parse the date of service from the .docx file
-def extract_date_of_service(docx_path):
-    extract_to = "extracted_docx"
+def extract_date_of_service(docx_path, use_in_memory=True):
+    extract_to = "extracted_docx_debug"
+    in_memory_result = None
+    directory_based_result = None
+    # Log the selected approach
+    if use_in_memory:
+        MediLink_ConfigLoader.log("Using In-Memory extraction approach for Surgery Schedule.", level="INFO")
+    else:
+        MediLink_ConfigLoader.log("Using Directory-Based extraction approach for Surgery Schedule.", level="INFO")
+    # Directory-Based Extraction
+    if not use_in_memory:  # Only perform directory-based extraction if in-memory is not selected
+        try:
+            if not os.path.exists(extract_to):
+                os.makedirs(extract_to)
+                MediLink_ConfigLoader.log("Created extraction directory: {}".format(extract_to), level="DEBUG")
+            with zipfile.ZipFile(docx_path, 'r') as docx:
+                MediLink_ConfigLoader.log("Opened DOCX file: {}".format(docx_path), level="DEBUG")
+                docx.extractall(extract_to)
+                MediLink_ConfigLoader.log("Extracted DOCX to: {}".format(extract_to), level="DEBUG")
+            file_path = find_text_in_xml(extract_to, "Surgery Schedule")
+            if file_path:
+                MediLink_ConfigLoader.log("Found XML file with target text: {}".format(file_path), level="DEBUG")
+                directory_based_result = extract_date_from_file(file_path)
+                MediLink_ConfigLoader.log("Directory-Based Extraction Result: {}".format(directory_based_result), level="DEBUG")
+            else:
+                MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files.", level="WARNING")
+        except zipfile.BadZipFile as e:
+            MediLink_ConfigLoader.log("BadZipFile Error opening DOCX file {}: {}".format(docx_path, e), level="ERROR")
+        except Exception as e:
+            MediLink_ConfigLoader.log("Error opening DOCX file {}: {}".format(docx_path, e), level="ERROR")
+    # In-Memory Extraction  // Single-Pass Processing is typically more efficient in terms of both time and memory compared to list creation for header isolation.
+    if use_in_memory:  # Only perform in-memory extraction if selected
+        try:
+            with zipfile.ZipFile(docx_path, 'r') as docx:
+                MediLink_ConfigLoader.log("Opened DOCX file for In-Memory extraction: {}".format(docx_path), level="DEBUG")
+                for file_info in docx.infolist():
+                    if file_info.filename.endswith('.xml'):
+                        MediLink_ConfigLoader.log("Processing XML file in-memory: {}".format(file_info.filename), level="DEBUG")
+                        with docx.open(file_info) as file:
+                            try:
+                                xml_content = file.read()  # Read the entire XML content
+                                MediLink_ConfigLoader.log("Read XML content from {}".format(file_info.filename), level="DEBUG")
+                                if "Surgery Schedule" in xml_content.decode('utf-8', errors='ignore'):
+                                    MediLink_ConfigLoader.log("Found 'Surgery Schedule' in file: {}".format(file_info.filename), level="DEBUG")
+                                    in_memory_result = extract_date_from_content(xml_content)
+                                    MediLink_ConfigLoader.log("In-Memory Extraction Result from {}: {}".format(file_info.filename, in_memory_result), level="DEBUG")
+                                    break  # Stop after finding the first relevant file
+                            except Exception as e:
+                                MediLink_ConfigLoader.log("Error parsing XML file {} (In-Memory): {}".format(file_info.filename, e), level="ERROR")
+                if in_memory_result is None:
+                    MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files (In-Memory).", level="WARNING")
+        except zipfile.BadZipFile as e:
+            MediLink_ConfigLoader.log("BadZipFile Error opening DOCX file for In-Memory extraction {}: {}".format(docx_path, e), level="ERROR")
+        except Exception as e:
+            MediLink_ConfigLoader.log("Error during In-Memory extraction of DOCX file {}: {}".format(docx_path, e), level="ERROR")
+    # Clean up the extracted directory if it exists
     try:
-        if not os.path.exists(extract_to):
-            os.makedirs(extract_to)
-        with zipfile.ZipFile(docx_path, 'r') as docx:
-            docx.extractall(extract_to)
-            MediLink_ConfigLoader.log("Extracted DOCX to: {}".format(extract_to), level="DEBUG")
-        file_path = find_text_in_xml(extract_to, "Surgery Schedule")
-        if file_path:
-            return extract_date_from_file(file_path)
-        else:
-            MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files.", level="WARNING")
-            return None
-    finally:
-        # Clean up the extracted files
-        remove_directory(extract_to)
-        MediLink_ConfigLoader.log("Cleaned up extracted files in: {}".format(extract_to), level="DEBUG")
+        if os.path.exists(extract_to):
+            remove_directory(extract_to)
+            MediLink_ConfigLoader.log("Cleaned up extracted files in: {}".format(extract_to), level="DEBUG")
+    except Exception as e:
+        MediLink_ConfigLoader.log("Error cleaning up extraction directory {}: {}".format(extract_to, e), level="ERROR")
-def remove_directory(path):
-    if os.path.exists(path):
-        for root, dirs, files in os.walk(path, topdown=False):
-            for name in files:
-                os.remove(os.path.join(root, name))
-            for name in dirs:
-                os.rmdir(os.path.join(root, name))
-        os.rmdir(path)
+    # Decide which result to return (prefer in-memory if available)
+    if in_memory_result:
+        return in_memory_result
+    elif directory_based_result:
+        return directory_based_result
+    else:
+        return None
-# Find the target text in the extracted XML files
-def find_text_in_xml(directory, target_text):
-    for root_dir, dirs, files in os.walk(directory):
+def find_text_in_xml(extract_dir, target_text):
+    target_pattern = re.compile(re.escape(target_text), re.IGNORECASE)
+    for root_dir, dirs, files in os.walk(extract_dir):
         for file in files:
-            if file.endswith('.xml'):
+            if file.endswith('.xml') and file != '[Content_Types].xml':  # Skip Content_Types.xml
                 file_path = os.path.join(root_dir, file)
                 try:
                     tree = etree.parse(file_path)
                     root = tree.getroot()
-                    namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} # hardcoded for XP handling BUG
+                    namespaces = root.nsmap
+                    MediLink_ConfigLoader.log("Processing file: {}".format(file_path), level="DEBUG")
                     for elem in root.xpath('//w:t', namespaces=namespaces):
-                        if elem.text and target_text in elem.text:
-                            MediLink_ConfigLoader.log("Found target text in file: {}".format(file_path), level="DEBUG")
+                        if elem.text and target_pattern.search(elem.text):
+                            MediLink_ConfigLoader.log("Found target text '{}' in file: {}".format(target_text, file_path), level="DEBUG")
                             return file_path
+                except etree.XMLSyntaxError as e:
+                    MediLink_ConfigLoader.log("XMLSyntaxError parsing file {}: {}".format(file_path, e), level="ERROR")
                 except Exception as e:
-                    MediLink_ConfigLoader.log("Error parsing XML file {}: {}".format(file_path, e))
-                    print("Error parsing XML file {}: {}".format(file_path, e))
+                    MediLink_ConfigLoader.log("Error parsing XML file {}: {}".format(file_path, e), level="ERROR")
+    MediLink_ConfigLoader.log("Target text '{}' not found in any XML files within directory: {}".format(target_text, extract_dir), level="WARNING")
     return None
-# Normalize month and day abbreviations
-def normalize_text(text):
-    month_map = {
-        'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
-        'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
-        'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
-    }
-    day_map = {
-        'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
-        'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
-    }
-    for abbr, full in month_map.items():
-        text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
-    for abbr, full in day_map.items():
-        text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
-    return text
-def reassemble_year(text):
-    # First, handle the most common case where a 4-digit year is split as (3,1), (1,3), or (2,2)
-    text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', text)
-    text = re.sub(r'(\d{1}) (\d{3})', r'\1\2', text)
-    text = re.sub(r'(\d{2}) (\d{2})', r'\1\2', text)
-    # Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
-    parts = re.findall(r'\b(\d{1,2})\b', text)
-    if len(parts) >= 4:
-        for i in range(len(parts) - 3):
-            candidate = ''.join(parts[i:i + 4])
-            if len(candidate) == 4 and candidate.isdigit():
-                combined_year = candidate
-                text = re.sub(r'\b' + r'\b \b'.join(parts[i:i + 4]) + r'\b', combined_year, text)
-                break
-    return text
-# Extract and parse the date from the file
 def extract_date_from_file(file_path):
     try:
         tree = etree.parse(file_path)
         root = tree.getroot()
         collected_text = []
-        namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} # hardcoded for XP handling BUG
+        namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}  # Hardcoded for XP handling BUG
         for elem in root.xpath('//w:t', namespaces=namespaces):
             if elem.text:
                 collected_text.append(elem.text.strip())
-        for elem in root.iter():
-            if elem.tag.endswith('t') and elem.text:
-                collected_text.append(elem.text.strip())
         combined_text = ' '.join(collected_text)
-        combined_text = reassemble_year(combined_text) # Fix OCR splitting years
-        # combined_text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', combined_text) # initial year regex.
+        combined_text = reassemble_year(combined_text)  # Fix OCR splitting years
         combined_text = normalize_text(combined_text)  # Normalize abbreviations
         combined_text = re.sub(r',', '', combined_text)  # Remove commas if they exist
         # Log the combined text
-        MediLink_ConfigLoader.log("Combined text: {}".format(combined_text), level="DEBUG")
-        # print("DEBUG: Combined text: {}".format(combined_text))
+        MediLink_ConfigLoader.log("Combined text from file '{}': {}".format(file_path, combined_text[:200]), level="DEBUG")
         day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
         month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
@@ -203,39 +223,171 @@ def extract_date_from_file(file_path):
         day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
         month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
         year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
         # Log the results of the regex searches
         MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
         MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
         MediLink_ConfigLoader.log("Year found: {}".format(year_match.group() if year_match else 'None'), level="DEBUG")
+        if day_of_week and month_day and year_match:
+            date_str = "{} {} {}".format(day_of_week.group(), month_day.group(), year_match.group())
+            try:
+                date_obj = datetime.strptime(date_str, '%A %B %d %Y')
+                extracted_date = date_obj.strftime('%m-%d-%Y')
+                MediLink_ConfigLoader.log("Extracted date: {}".format(extracted_date), level="DEBUG")
+                return extracted_date
+            except ValueError as e:
+                MediLink_ConfigLoader.log("Error converting date: {}. Error: {}".format(date_str, e), level="ERROR")
+        else:
+            MediLink_ConfigLoader.log(
+                "Date components not found or incomplete. Combined text: '{}', Day of week: {}, Month and day: {}, Year: {}".format(
+                    combined_text,
+                    day_of_week.group() if day_of_week else 'None',
+                    month_day.group() if month_day else 'None',
+                    year_match.group() if year_match else 'None'
+                ), level="WARNING"
+            )
+    except etree.XMLSyntaxError as e:
+        MediLink_ConfigLoader.log("XMLSyntaxError in extract_date_from_file '{}': {}".format(file_path, e), level="ERROR")
+    except Exception as e:
+        MediLink_ConfigLoader.log("Error extracting date from file '{}': {}".format(file_path, e), level="ERROR")
+    return None
+def extract_date_from_content(xml_content):
+    try:
+        # Parse the XML content into an ElementTree
+        tree = etree.fromstring(xml_content)
+        root = tree  # root is already the root element in this case
+        collected_text = []
+        namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
+        MediLink_ConfigLoader.log("Using namespaces: {}".format(namespaces), level="DEBUG")
+        # Extract text from all <w:t> elements
+        for elem in root.xpath('//w:t', namespaces=namespaces):
+            if elem.text:
+                collected_text.append(elem.text.strip())
+        # Log the collected text snippets
+        MediLink_ConfigLoader.log("Collected text snippets: {}".format(collected_text), level="DEBUG")
+        combined_text = ' '.join(collected_text)
+        combined_text = reassemble_year(combined_text)  # Fix OCR splitting years
+        combined_text = normalize_text(combined_text)    # Normalize abbreviations
+        combined_text = re.sub(r',', '', combined_text)   # Remove commas if they exist
+        # Log the combined text
+        MediLink_ConfigLoader.log("Combined text: {}".format(combined_text[:200]), level="DEBUG")  # Log first 200 characters
+        day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
+        month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
+        year_pattern = r"\d{4}"
+        day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
+        month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
+        year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
+        MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
+        MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
+        MediLink_ConfigLoader.log("Year found: {}".format(year_match.group() if year_match else 'None'), level="DEBUG")
         if day_of_week and month_day and year_match:
             date_str = "{} {} {}".format(day_of_week.group(), month_day.group(), year_match.group())
             try:
                 date_obj = datetime.strptime(date_str, '%A %B %d %Y')
-                return date_obj.strftime('%m-%d-%Y')
+                extracted_date = date_obj.strftime('%m-%d-%Y')
+                MediLink_ConfigLoader.log("Extracted date: {}".format(extracted_date), level="DEBUG")
+                return extracted_date
             except ValueError as e:
                 MediLink_ConfigLoader.log("Error converting date: {}. Error: {}".format(date_str, e), level="ERROR")
         else:
-            MediLink_ConfigLoader.log("Date components not found or incomplete in the text. Combined text: {}, Day of week: {}, Month and day: {}, Year: {}"
-                .format(combined_text,
-                        day_of_week.group() if day_of_week else 'None',
-                        month_day.group() if month_day else 'None',
-                        year_match.group() if year_match else 'None'),
-                level="WARNING")
+            MediLink_ConfigLoader.log(
+                "Date components not found or incomplete. Combined text: '{}', Day of week: {}, Month and day: {}, Year: {}".format(
+                    combined_text,
+                    day_of_week.group() if day_of_week else 'None',
+                    month_day.group() if month_day else 'None',
+                    year_match.group() if year_match else 'None'
+                ), level="WARNING"
+            )
+    except etree.XMLSyntaxError as e:
+        MediLink_ConfigLoader.log("XMLSyntaxError in extract_date_from_content: {}".format(e), level="ERROR")
     except Exception as e:
-        MediLink_ConfigLoader.log("Error extracting date from file: {}. Error: {}".format(file_path, e))
-        print("Error extracting date from file: {}. Error: {}".format(file_path, e))
+        MediLink_ConfigLoader.log("Error extracting date from content: {}".format(e), level="ERROR")
     return None
+def remove_directory(path):
+    if os.path.exists(path):
+        for root, dirs, files in os.walk(path, topdown=False):
+            for name in files:
+                try:
+                    os.remove(os.path.join(root, name))
+                    MediLink_ConfigLoader.log("Removed file: {}".format(os.path.join(root, name)), level="DEBUG")
+                except Exception as e:
+                    MediLink_ConfigLoader.log("Error removing file {}: {}".format(os.path.join(root, name), e), level="ERROR")
+            for name in dirs:
+                try:
+                    os.rmdir(os.path.join(root, name))
+                    MediLink_ConfigLoader.log("Removed directory: {}".format(os.path.join(root, name)), level="DEBUG")
+                except Exception as e:
+                    MediLink_ConfigLoader.log("Error removing directory {}: {}".format(os.path.join(root, name), e), level="ERROR")
+        try:
+            os.rmdir(path)
+            MediLink_ConfigLoader.log("Removed extraction root directory: {}".format(path), level="DEBUG")
+        except Exception as e:
+            MediLink_ConfigLoader.log("Error removing root directory {}: {}".format(path, e), level="ERROR")
+def normalize_text(text):
+    month_map = {
+        'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
+        'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
+        'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
+    }
+    day_map = {
+        'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
+        'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
+    }
+    for abbr, full in month_map.items():
+        text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
+    for abbr, full in day_map.items():
+        text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
+    return text
+def reassemble_year(text):
+    # First, handle the most common case where a 4-digit year is split as (3,1), (1,3), or (2,2)
+    text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', text)
+    text = re.sub(r'(\d{1}) (\d{3})', r'\1\2', text)
+    text = re.sub(r'(\d{2}) (\d{2})', r'\1\2', text)
+    # Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
+    parts = re.findall(r'\b(\d{1,2})\b', text)
+    if len(parts) >= 4:
+        for i in range(len(parts) - 3):
+            candidate = ''.join(parts[i:i + 4])
+            if len(candidate) == 4 and candidate.isdigit():
+                combined_year = candidate
+                pattern = r'\b' + r'\s+'.join(parts[i:i + 4]) + r'\b'
+                text = re.sub(pattern, combined_year, text)
+                break
+    return text
 def parse_patient_id(text):
     try:
         return text.split()[0].lstrip('#')  # Extract patient ID number (removing the '#')
     except Exception as e:
-        MediLink_ConfigLoader.log("Error parsing patient ID: {}. Error: {}".format(text, e))
+        MediLink_ConfigLoader.log("Error parsing patient ID: {}. Error: {}".format(text, e), level="ERROR")
         return None
 def parse_diagnosis_code(text):
     try:
         # Regular expression to find all ICD-10 codes starting with 'H' and containing a period
@@ -252,9 +404,10 @@ def parse_diagnosis_code(text):
             return text.split('/')[0]
     except Exception as e:
-        MediLink_ConfigLoader.log("Error parsing diagnosis code: {}. Error: {}".format(text, e))
+        MediLink_ConfigLoader.log("Error parsing diagnosis code: {}. Error: {}".format(text, e), level="ERROR")
         return "Unknown"
 def parse_left_or_right_eye(text):
     try:
         if 'LEFT EYE' in text.upper():
@@ -264,9 +417,10 @@ def parse_left_or_right_eye(text):
         else:
             return 'Unknown'
     except Exception as e:
-        MediLink_ConfigLoader.log("Error parsing left or right eye: {}. Error: {}".format(text, e))
+        MediLink_ConfigLoader.log("Error parsing left or right eye: {}. Error: {}".format(text, e), level="ERROR")
         return 'Unknown'
 def parse_femto_yes_or_no(text):
     try:
         if 'FEMTO' in text.upper():
@@ -274,9 +428,10 @@ def parse_femto_yes_or_no(text):
         else:
             return False
     except Exception as e:
-        MediLink_ConfigLoader.log("Error parsing femto yes or no: {}. Error: {}".format(text, e))
+        MediLink_ConfigLoader.log("Error parsing femto yes or no: {}. Error: {}".format(text, e), level="ERROR")
         return False
 def rotate_docx_files(directory):
     # List all files in the directory
     files = os.listdir(directory)
@@ -294,10 +449,12 @@ def rotate_docx_files(directory):
         pprint.pprint(patient_data_dict)
         print()
 def main():
     # Call the function with the directory containing your .docx files
     directory = "C:\\Users\\danie\\Downloads\\"
     rotate_docx_files(directory)
 if __name__ == "__main__":
     main()

medicafe 0.240809.0__py3-none-any.whl → 0.241015.0__py3-none-any.whl

Potentially problematic release.

medicafe 0.240809.0py3-none-any.whl → 0.241015.0py3-none-any.whl