medicafe 0.240809.0__py3-none-any.whl → 0.241015.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of medicafe might be problematic. Click here for more details.

MediBot/MediBot_UI.py CHANGED
@@ -1,22 +1,18 @@
1
- from sys import exit
2
- import ctypes
1
+ #MediBot_UI.py
2
+ import ctypes, time, re, os, sys
3
3
  from ctypes import wintypes
4
- import time
5
- import re
6
-
7
- # Add parent directory of the project to the Python path
8
- import os
9
- import sys
4
+ from sys import exit
10
5
  project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
11
- sys.path.append(project_dir)
6
+ if project_dir not in sys.path:
7
+ sys.path.append(project_dir)
12
8
 
13
9
  try:
14
10
  from MediLink import MediLink_ConfigLoader
15
- config, crosswalk = MediLink_ConfigLoader.load_configuration()
16
11
  except ImportError:
17
- from MediLink_ConfigLoader import load_configuration
18
- config, crosswalk = load_configuration()
19
-
12
+ import MediLink_ConfigLoader
13
+
14
+ # Load configuration
15
+ config, crosswalk = MediLink_ConfigLoader.load_configuration()
20
16
 
21
17
  # Function to check if a specific key is pressed
22
18
  VK_END = int(config.get('VK_END', ""), 16) # Try F12 (7B). Virtual key code for 'End' (23)
@@ -50,6 +46,7 @@ class AppControl:
50
46
 
51
47
  def load_paths_from_config(self, medicare=False):
52
48
  # Assuming `config` is a module or a globally accessible configuration dictionary
49
+ # TODO Is this where the MAINS paths should also be set?
53
50
  if medicare:
54
51
  self.mapat_med_path = config.get('MEDICARE_MAPAT_MED_PATH', "")
55
52
  self.medisoft_shortcut = config.get('MEDICARE_SHORTCUT', "")
@@ -211,18 +208,22 @@ def user_interaction(csv_data, interaction_mode, error_message, reverse_mapping)
211
208
  display_menu_header(" =(^.^)= Welcome to MediBot! =(^.^)=")
212
209
 
213
210
  while True:
214
- response = input("\nAm I processing Medicare patients? (yes/no): ").lower().strip()
215
- if response:
216
- if response in ['yes', 'y']:
217
- app_control.load_paths_from_config(medicare=True)
218
- break
219
- elif response in ['no', 'n']:
220
- app_control.load_paths_from_config(medicare=False)
221
- break
211
+ try:
212
+ response = input("\nAm I processing Medicare patients? (yes/no): ").lower().strip()
213
+ if response:
214
+ if response in ['yes', 'y']:
215
+ app_control.load_paths_from_config(medicare=True)
216
+ break
217
+ elif response in ['no', 'n']:
218
+ app_control.load_paths_from_config(medicare=False)
219
+ break
220
+ else:
221
+ print("Invalid entry. Please enter 'yes' or 'no'.")
222
222
  else:
223
- print("Invalid entry. Please enter 'yes' or 'no'.")
224
- else:
225
- print("A response is required. Please try again.")
223
+ print("A response is required. Please try again.")
224
+ except KeyboardInterrupt:
225
+ print("\nOperation cancelled by user. Exiting script.")
226
+ exit()
226
227
 
227
228
  fixed_values = config.get('fixed_values', {}) # Get fixed values from config json
228
229
  if response in ['yes', 'y']:
@@ -1,3 +1,4 @@
1
+ #MediBot_dataformat_library.py
1
2
  import re
2
3
  from datetime import datetime
3
4
  import re #for addresses
@@ -159,31 +160,22 @@ def format_zip(value):
159
160
  return value_str[:5]
160
161
 
161
162
  def format_data(medisoft_field, value, csv_data, reverse_mapping, parsed_address_components):
162
- if medisoft_field == 'Patient Name':
163
- formatted_value = format_name(value)
164
- elif medisoft_field == 'Birth Date':
165
- formatted_value = format_date(value)
166
- elif medisoft_field == 'Phone':
167
- formatted_value = format_phone(value)
168
- elif medisoft_field == 'Phone #2':
169
- formatted_value = format_phone(value)
170
- elif medisoft_field == 'Gender':
171
- formatted_value = format_gender(value)
172
- elif medisoft_field == 'Street':
173
- formatted_value = format_street(value, csv_data, reverse_mapping, parsed_address_components)
174
- elif medisoft_field == 'Zip Code':
175
- formatted_value = format_zip(value)
176
- elif medisoft_field == 'Primary Policy Number':
177
- formatted_value = format_policy(value)
178
- elif medisoft_field == 'Secondary Policy Number':
179
- formatted_value = format_policy(value)
180
- elif medisoft_field == 'Primary Group Number':
181
- formatted_value = format_policy(value)
182
- elif medisoft_field == 'Secondary Group Number':
183
- formatted_value = format_policy(value)
184
- else:
185
- formatted_value = str(value) # Ensure value is always a string
186
-
163
+ formatters = {
164
+ 'Patient Name': format_name,
165
+ 'Birth Date': format_date,
166
+ 'Phone': format_phone,
167
+ 'Phone #2': format_phone,
168
+ 'Gender': format_gender,
169
+ 'Street': lambda v: format_street(v, csv_data, reverse_mapping, parsed_address_components),
170
+ 'Zip Code': format_zip,
171
+ 'Primary Policy Number': format_policy,
172
+ 'Secondary Policy Number': format_policy,
173
+ 'Primary Group Number': format_policy,
174
+ 'Secondary Group Number': format_policy
175
+ }
176
+
177
+ formatted_value = formatters.get(medisoft_field, str)(value) # Default to str if not found
187
178
  formatted_value = formatted_value.replace(',', '{,}').replace(' ', '{Space}')
179
+
188
180
  ahk_command = 'SendInput, {}{{Enter}}'.format(formatted_value)
189
181
  return ahk_command
@@ -1,11 +1,9 @@
1
- from docx import Document
2
- import re
3
- from lxml import etree
4
- import zipfile
1
+ #MediBot_docx_decoder.py
5
2
  from datetime import datetime
6
- import os
7
- import sys
8
3
  from collections import OrderedDict
4
+ import os, re, sys, zipfile
5
+ from docx import Document
6
+ from lxml import etree
9
7
 
10
8
  # Add parent directory of the project to the Python path
11
9
  project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
@@ -16,11 +14,12 @@ try:
16
14
  except ImportError:
17
15
  from MediLink import MediLink_ConfigLoader
18
16
 
19
- def parse_docx(filepath):
17
+
18
+ def parse_docx(filepath, surgery_dates): # Accept surgery_dates as a parameter
20
19
  try:
21
20
  doc = Document(filepath) # Open the .docx file
22
21
  except Exception as e:
23
- MediLink_ConfigLoader.log("Error opening document: {}".format(e)) # Log error
22
+ MediLink_ConfigLoader.log("Error opening document: {}".format(e), level="ERROR") # Log error
24
23
  return {}
25
24
 
26
25
  patient_data = OrderedDict() # Initialize OrderedDict to store data
@@ -29,6 +28,18 @@ def parse_docx(filepath):
29
28
  date_of_service = extract_date_of_service(filepath) # Extract date of service
30
29
  MediLink_ConfigLoader.log("Date of Service recorded as: {}".format(date_of_service), level="DEBUG")
31
30
 
31
+ # Convert date_of_service to match the format of surgery_dates
32
+ date_of_service = datetime.strptime(date_of_service, '%m-%d-%Y') # Convert to datetime object
33
+ # Check if the date_of_service is in the passed surgery_dates
34
+ if date_of_service not in surgery_dates: # Direct comparison with datetime objects
35
+ MediLink_ConfigLoader.log("Date of Service {} not found in provided surgery dates. Skipping document.".format(date_of_service), level="DEBUG")
36
+ return {} # Early exit if date is not found
37
+
38
+ MediLink_ConfigLoader.log("Date of Service {} found in surgery dates. Proceeding with parsing of the document.".format(date_of_service), level="DEBUG") # Log that date of service was found
39
+ # Convert back to MM-DD-YYYY format.
40
+ # TODO in the future, maybe just do the treatment to surgery_dates, no need to convert back and forth..
41
+ date_of_service = date_of_service.strftime('%m-%d-%Y')
42
+
32
43
  for table in doc.tables: # Iterate over tables in the document
33
44
  for row in table.rows:
34
45
  cells = [cell.text.strip() for cell in row.cells]
@@ -43,11 +54,11 @@ def parse_docx(filepath):
43
54
  patient_data[patient_id] = {}
44
55
 
45
56
  if date_of_service in patient_data[patient_id]:
46
- MediLink_ConfigLoader.log("Duplicate entry for patient ID {} on date {}. Skipping.".format(patient_id, date_of_service))
57
+ MediLink_ConfigLoader.log("Duplicate entry for patient ID {} on date {}. Skipping.".format(patient_id, date_of_service), level="WARNING")
47
58
  else:
48
59
  patient_data[patient_id][date_of_service] = [diagnosis_code, left_or_right_eye, femto_yes_or_no]
49
60
  except Exception as e:
50
- MediLink_ConfigLoader.log("Error processing row: {}. Error: {}".format(cells, e))
61
+ MediLink_ConfigLoader.log("Error processing row: {}. Error: {}".format(cells, e), level="ERROR")
51
62
 
52
63
  # Validation steps
53
64
  validate_unknown_entries(patient_data)
@@ -55,17 +66,18 @@ def parse_docx(filepath):
55
66
 
56
67
  return patient_data
57
68
 
69
+
58
70
  def validate_unknown_entries(patient_data):
59
71
  for patient_id, dates in list(patient_data.items()):
60
72
  for date, details in list(dates.items()):
61
73
  if 'Unknown' in details:
62
74
  warning_message = "Warning: 'Unknown' entry found. Patient ID: {}, Date: {}, Details: {}".format(patient_id, date, details)
63
75
  MediLink_ConfigLoader.log(warning_message, level="WARNING")
64
- print(warning_message)
65
76
  del patient_data[patient_id][date]
66
77
  if not patient_data[patient_id]: # If no dates left for the patient, remove the patient
67
78
  del patient_data[patient_id]
68
79
 
80
+
69
81
  def validate_diagnostic_code(patient_data):
70
82
  for patient_id, dates in patient_data.items():
71
83
  for date, details in dates.items():
@@ -76,125 +88,133 @@ def validate_diagnostic_code(patient_data):
76
88
  elif eye == 'Right' and not diagnostic_code.endswith('1'):
77
89
  log_and_warn(patient_id, date, diagnostic_code, eye)
78
90
 
91
+
79
92
  def log_and_warn(patient_id, date, diagnostic_code, eye):
80
93
  warning_message = (
81
94
  "Warning: Mismatch found for Patient ID: {}, Date: {}, "
82
95
  "Diagnostic Code: {}, Eye: {}".format(patient_id, date, diagnostic_code, eye)
83
96
  )
84
97
  MediLink_ConfigLoader.log(warning_message, level="WARNING")
85
- print(warning_message)
86
98
 
87
- # Extract and parse the date of service from the .docx file
88
- def extract_date_of_service(docx_path):
89
- extract_to = "extracted_docx"
99
+
100
+ def extract_date_of_service(docx_path, use_in_memory=True):
101
+ extract_to = "extracted_docx_debug"
102
+ in_memory_result = None
103
+ directory_based_result = None
104
+
105
+ # Log the selected approach
106
+ if use_in_memory:
107
+ MediLink_ConfigLoader.log("Using In-Memory extraction approach for Surgery Schedule.", level="INFO")
108
+ else:
109
+ MediLink_ConfigLoader.log("Using Directory-Based extraction approach for Surgery Schedule.", level="INFO")
110
+
111
+ # Directory-Based Extraction
112
+ if not use_in_memory: # Only perform directory-based extraction if in-memory is not selected
113
+ try:
114
+ if not os.path.exists(extract_to):
115
+ os.makedirs(extract_to)
116
+ MediLink_ConfigLoader.log("Created extraction directory: {}".format(extract_to), level="DEBUG")
117
+
118
+ with zipfile.ZipFile(docx_path, 'r') as docx:
119
+ MediLink_ConfigLoader.log("Opened DOCX file: {}".format(docx_path), level="DEBUG")
120
+ docx.extractall(extract_to)
121
+ MediLink_ConfigLoader.log("Extracted DOCX to: {}".format(extract_to), level="DEBUG")
122
+
123
+ file_path = find_text_in_xml(extract_to, "Surgery Schedule")
124
+ if file_path:
125
+ MediLink_ConfigLoader.log("Found XML file with target text: {}".format(file_path), level="DEBUG")
126
+ directory_based_result = extract_date_from_file(file_path)
127
+ MediLink_ConfigLoader.log("Directory-Based Extraction Result: {}".format(directory_based_result), level="DEBUG")
128
+ else:
129
+ MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files.", level="WARNING")
130
+ except zipfile.BadZipFile as e:
131
+ MediLink_ConfigLoader.log("BadZipFile Error opening DOCX file {}: {}".format(docx_path, e), level="ERROR")
132
+ except Exception as e:
133
+ MediLink_ConfigLoader.log("Error opening DOCX file {}: {}".format(docx_path, e), level="ERROR")
134
+
135
+ # In-Memory Extraction // Single-Pass Processing is typically more efficient in terms of both time and memory compared to list creation for header isolation.
136
+ if use_in_memory: # Only perform in-memory extraction if selected
137
+ try:
138
+ with zipfile.ZipFile(docx_path, 'r') as docx:
139
+ MediLink_ConfigLoader.log("Opened DOCX file for In-Memory extraction: {}".format(docx_path), level="DEBUG")
140
+ for file_info in docx.infolist():
141
+ if file_info.filename.endswith('.xml'):
142
+ MediLink_ConfigLoader.log("Processing XML file in-memory: {}".format(file_info.filename), level="DEBUG")
143
+ with docx.open(file_info) as file:
144
+ try:
145
+ xml_content = file.read() # Read the entire XML content
146
+ MediLink_ConfigLoader.log("Read XML content from {}".format(file_info.filename), level="DEBUG")
147
+ if "Surgery Schedule" in xml_content.decode('utf-8', errors='ignore'):
148
+ MediLink_ConfigLoader.log("Found 'Surgery Schedule' in file: {}".format(file_info.filename), level="DEBUG")
149
+ in_memory_result = extract_date_from_content(xml_content)
150
+ MediLink_ConfigLoader.log("In-Memory Extraction Result from {}: {}".format(file_info.filename, in_memory_result), level="DEBUG")
151
+ break # Stop after finding the first relevant file
152
+ except Exception as e:
153
+ MediLink_ConfigLoader.log("Error parsing XML file {} (In-Memory): {}".format(file_info.filename, e), level="ERROR")
154
+
155
+ if in_memory_result is None:
156
+ MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files (In-Memory).", level="WARNING")
157
+ except zipfile.BadZipFile as e:
158
+ MediLink_ConfigLoader.log("BadZipFile Error opening DOCX file for In-Memory extraction {}: {}".format(docx_path, e), level="ERROR")
159
+ except Exception as e:
160
+ MediLink_ConfigLoader.log("Error during In-Memory extraction of DOCX file {}: {}".format(docx_path, e), level="ERROR")
161
+
162
+ # Clean up the extracted directory if it exists
90
163
  try:
91
- if not os.path.exists(extract_to):
92
- os.makedirs(extract_to)
93
- with zipfile.ZipFile(docx_path, 'r') as docx:
94
- docx.extractall(extract_to)
95
- MediLink_ConfigLoader.log("Extracted DOCX to: {}".format(extract_to), level="DEBUG")
96
-
97
- file_path = find_text_in_xml(extract_to, "Surgery Schedule")
98
- if file_path:
99
- return extract_date_from_file(file_path)
100
- else:
101
- MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files.", level="WARNING")
102
- return None
103
- finally:
104
- # Clean up the extracted files
105
- remove_directory(extract_to)
106
- MediLink_ConfigLoader.log("Cleaned up extracted files in: {}".format(extract_to), level="DEBUG")
164
+ if os.path.exists(extract_to):
165
+ remove_directory(extract_to)
166
+ MediLink_ConfigLoader.log("Cleaned up extracted files in: {}".format(extract_to), level="DEBUG")
167
+ except Exception as e:
168
+ MediLink_ConfigLoader.log("Error cleaning up extraction directory {}: {}".format(extract_to, e), level="ERROR")
107
169
 
108
- def remove_directory(path):
109
- if os.path.exists(path):
110
- for root, dirs, files in os.walk(path, topdown=False):
111
- for name in files:
112
- os.remove(os.path.join(root, name))
113
- for name in dirs:
114
- os.rmdir(os.path.join(root, name))
115
- os.rmdir(path)
170
+ # Decide which result to return (prefer in-memory if available)
171
+ if in_memory_result:
172
+ return in_memory_result
173
+ elif directory_based_result:
174
+ return directory_based_result
175
+ else:
176
+ return None
116
177
 
117
- # Find the target text in the extracted XML files
118
- def find_text_in_xml(directory, target_text):
119
- for root_dir, dirs, files in os.walk(directory):
178
+ def find_text_in_xml(extract_dir, target_text):
179
+ target_pattern = re.compile(re.escape(target_text), re.IGNORECASE)
180
+ for root_dir, dirs, files in os.walk(extract_dir):
120
181
  for file in files:
121
- if file.endswith('.xml'):
182
+ if file.endswith('.xml') and file != '[Content_Types].xml': # Skip Content_Types.xml
122
183
  file_path = os.path.join(root_dir, file)
123
184
  try:
124
185
  tree = etree.parse(file_path)
125
186
  root = tree.getroot()
126
- namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} # hardcoded for XP handling BUG
187
+ namespaces = root.nsmap
188
+ MediLink_ConfigLoader.log("Processing file: {}".format(file_path), level="DEBUG")
127
189
  for elem in root.xpath('//w:t', namespaces=namespaces):
128
- if elem.text and target_text in elem.text:
129
- MediLink_ConfigLoader.log("Found target text in file: {}".format(file_path), level="DEBUG")
190
+ if elem.text and target_pattern.search(elem.text):
191
+ MediLink_ConfigLoader.log("Found target text '{}' in file: {}".format(target_text, file_path), level="DEBUG")
130
192
  return file_path
193
+ except etree.XMLSyntaxError as e:
194
+ MediLink_ConfigLoader.log("XMLSyntaxError parsing file {}: {}".format(file_path, e), level="ERROR")
131
195
  except Exception as e:
132
- MediLink_ConfigLoader.log("Error parsing XML file {}: {}".format(file_path, e))
133
- print("Error parsing XML file {}: {}".format(file_path, e))
196
+ MediLink_ConfigLoader.log("Error parsing XML file {}: {}".format(file_path, e), level="ERROR")
197
+ MediLink_ConfigLoader.log("Target text '{}' not found in any XML files within directory: {}".format(target_text, extract_dir), level="WARNING")
134
198
  return None
135
199
 
136
- # Normalize month and day abbreviations
137
- def normalize_text(text):
138
- month_map = {
139
- 'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
140
- 'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
141
- 'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
142
- }
143
- day_map = {
144
- 'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
145
- 'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
146
- }
147
-
148
- for abbr, full in month_map.items():
149
- text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
150
- for abbr, full in day_map.items():
151
- text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
152
-
153
- return text
154
-
155
- def reassemble_year(text):
156
- # First, handle the most common case where a 4-digit year is split as (3,1), (1,3), or (2,2)
157
- text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', text)
158
- text = re.sub(r'(\d{1}) (\d{3})', r'\1\2', text)
159
- text = re.sub(r'(\d{2}) (\d{2})', r'\1\2', text)
160
-
161
- # Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
162
- parts = re.findall(r'\b(\d{1,2})\b', text)
163
- if len(parts) >= 4:
164
- for i in range(len(parts) - 3):
165
- candidate = ''.join(parts[i:i + 4])
166
- if len(candidate) == 4 and candidate.isdigit():
167
- combined_year = candidate
168
- text = re.sub(r'\b' + r'\b \b'.join(parts[i:i + 4]) + r'\b', combined_year, text)
169
- break
170
-
171
- return text
172
-
173
- # Extract and parse the date from the file
174
200
  def extract_date_from_file(file_path):
175
201
  try:
176
202
  tree = etree.parse(file_path)
177
203
  root = tree.getroot()
178
204
  collected_text = []
179
205
 
180
- namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} # hardcoded for XP handling BUG
206
+ namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} # Hardcoded for XP handling BUG
181
207
  for elem in root.xpath('//w:t', namespaces=namespaces):
182
208
  if elem.text:
183
209
  collected_text.append(elem.text.strip())
184
210
 
185
- for elem in root.iter():
186
- if elem.tag.endswith('t') and elem.text:
187
- collected_text.append(elem.text.strip())
188
-
189
211
  combined_text = ' '.join(collected_text)
190
- combined_text = reassemble_year(combined_text) # Fix OCR splitting years
191
- # combined_text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', combined_text) # initial year regex.
212
+ combined_text = reassemble_year(combined_text) # Fix OCR splitting years
192
213
  combined_text = normalize_text(combined_text) # Normalize abbreviations
193
214
  combined_text = re.sub(r',', '', combined_text) # Remove commas if they exist
194
215
 
195
216
  # Log the combined text
196
- MediLink_ConfigLoader.log("Combined text: {}".format(combined_text), level="DEBUG")
197
- # print("DEBUG: Combined text: {}".format(combined_text))
217
+ MediLink_ConfigLoader.log("Combined text from file '{}': {}".format(file_path, combined_text[:200]), level="DEBUG")
198
218
 
199
219
  day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
200
220
  month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
@@ -203,39 +223,171 @@ def extract_date_from_file(file_path):
203
223
  day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
204
224
  month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
205
225
  year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
206
-
226
+
207
227
  # Log the results of the regex searches
208
228
  MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
209
229
  MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
210
230
  MediLink_ConfigLoader.log("Year found: {}".format(year_match.group() if year_match else 'None'), level="DEBUG")
231
+
232
+ if day_of_week and month_day and year_match:
233
+ date_str = "{} {} {}".format(day_of_week.group(), month_day.group(), year_match.group())
234
+ try:
235
+ date_obj = datetime.strptime(date_str, '%A %B %d %Y')
236
+ extracted_date = date_obj.strftime('%m-%d-%Y')
237
+ MediLink_ConfigLoader.log("Extracted date: {}".format(extracted_date), level="DEBUG")
238
+ return extracted_date
239
+ except ValueError as e:
240
+ MediLink_ConfigLoader.log("Error converting date: {}. Error: {}".format(date_str, e), level="ERROR")
241
+ else:
242
+ MediLink_ConfigLoader.log(
243
+ "Date components not found or incomplete. Combined text: '{}', Day of week: {}, Month and day: {}, Year: {}".format(
244
+ combined_text,
245
+ day_of_week.group() if day_of_week else 'None',
246
+ month_day.group() if month_day else 'None',
247
+ year_match.group() if year_match else 'None'
248
+ ), level="WARNING"
249
+ )
250
+ except etree.XMLSyntaxError as e:
251
+ MediLink_ConfigLoader.log("XMLSyntaxError in extract_date_from_file '{}': {}".format(file_path, e), level="ERROR")
252
+ except Exception as e:
253
+ MediLink_ConfigLoader.log("Error extracting date from file '{}': {}".format(file_path, e), level="ERROR")
254
+
255
+ return None
256
+
257
+
258
+ def extract_date_from_content(xml_content):
259
+ try:
260
+ # Parse the XML content into an ElementTree
261
+ tree = etree.fromstring(xml_content)
262
+ root = tree # root is already the root element in this case
263
+ collected_text = []
264
+
265
+ namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
266
+ MediLink_ConfigLoader.log("Using namespaces: {}".format(namespaces), level="DEBUG")
267
+
268
+ # Extract text from all <w:t> elements
269
+ for elem in root.xpath('//w:t', namespaces=namespaces):
270
+ if elem.text:
271
+ collected_text.append(elem.text.strip())
272
+
273
+ # Log the collected text snippets
274
+ MediLink_ConfigLoader.log("Collected text snippets: {}".format(collected_text), level="DEBUG")
275
+
276
+ combined_text = ' '.join(collected_text)
277
+ combined_text = reassemble_year(combined_text) # Fix OCR splitting years
278
+ combined_text = normalize_text(combined_text) # Normalize abbreviations
279
+ combined_text = re.sub(r',', '', combined_text) # Remove commas if they exist
280
+
281
+ # Log the combined text
282
+ MediLink_ConfigLoader.log("Combined text: {}".format(combined_text[:200]), level="DEBUG") # Log first 200 characters
283
+
284
+ day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
285
+ month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
286
+ year_pattern = r"\d{4}"
287
+
288
+ day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
289
+ month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
290
+ year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
291
+
292
+ MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
293
+ MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
294
+ MediLink_ConfigLoader.log("Year found: {}".format(year_match.group() if year_match else 'None'), level="DEBUG")
211
295
 
212
296
  if day_of_week and month_day and year_match:
213
297
  date_str = "{} {} {}".format(day_of_week.group(), month_day.group(), year_match.group())
214
298
  try:
215
299
  date_obj = datetime.strptime(date_str, '%A %B %d %Y')
216
- return date_obj.strftime('%m-%d-%Y')
300
+ extracted_date = date_obj.strftime('%m-%d-%Y')
301
+ MediLink_ConfigLoader.log("Extracted date: {}".format(extracted_date), level="DEBUG")
302
+ return extracted_date
217
303
  except ValueError as e:
218
304
  MediLink_ConfigLoader.log("Error converting date: {}. Error: {}".format(date_str, e), level="ERROR")
219
305
  else:
220
- MediLink_ConfigLoader.log("Date components not found or incomplete in the text. Combined text: {}, Day of week: {}, Month and day: {}, Year: {}"
221
- .format(combined_text,
222
- day_of_week.group() if day_of_week else 'None',
223
- month_day.group() if month_day else 'None',
224
- year_match.group() if year_match else 'None'),
225
- level="WARNING")
306
+ MediLink_ConfigLoader.log(
307
+ "Date components not found or incomplete. Combined text: '{}', Day of week: {}, Month and day: {}, Year: {}".format(
308
+ combined_text,
309
+ day_of_week.group() if day_of_week else 'None',
310
+ month_day.group() if month_day else 'None',
311
+ year_match.group() if year_match else 'None'
312
+ ), level="WARNING"
313
+ )
314
+ except etree.XMLSyntaxError as e:
315
+ MediLink_ConfigLoader.log("XMLSyntaxError in extract_date_from_content: {}".format(e), level="ERROR")
226
316
  except Exception as e:
227
- MediLink_ConfigLoader.log("Error extracting date from file: {}. Error: {}".format(file_path, e))
228
- print("Error extracting date from file: {}. Error: {}".format(file_path, e))
229
-
317
+ MediLink_ConfigLoader.log("Error extracting date from content: {}".format(e), level="ERROR")
318
+
230
319
  return None
231
320
 
321
+
322
+ def remove_directory(path):
323
+ if os.path.exists(path):
324
+ for root, dirs, files in os.walk(path, topdown=False):
325
+ for name in files:
326
+ try:
327
+ os.remove(os.path.join(root, name))
328
+ MediLink_ConfigLoader.log("Removed file: {}".format(os.path.join(root, name)), level="DEBUG")
329
+ except Exception as e:
330
+ MediLink_ConfigLoader.log("Error removing file {}: {}".format(os.path.join(root, name), e), level="ERROR")
331
+ for name in dirs:
332
+ try:
333
+ os.rmdir(os.path.join(root, name))
334
+ MediLink_ConfigLoader.log("Removed directory: {}".format(os.path.join(root, name)), level="DEBUG")
335
+ except Exception as e:
336
+ MediLink_ConfigLoader.log("Error removing directory {}: {}".format(os.path.join(root, name), e), level="ERROR")
337
+ try:
338
+ os.rmdir(path)
339
+ MediLink_ConfigLoader.log("Removed extraction root directory: {}".format(path), level="DEBUG")
340
+ except Exception as e:
341
+ MediLink_ConfigLoader.log("Error removing root directory {}: {}".format(path, e), level="ERROR")
342
+
343
+
344
+ def normalize_text(text):
345
+ month_map = {
346
+ 'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
347
+ 'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
348
+ 'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
349
+ }
350
+ day_map = {
351
+ 'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
352
+ 'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
353
+ }
354
+
355
+ for abbr, full in month_map.items():
356
+ text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
357
+ for abbr, full in day_map.items():
358
+ text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
359
+
360
+ return text
361
+
362
+
363
+ def reassemble_year(text):
364
+ # First, handle the most common case where a 4-digit year is split as (3,1), (1,3), or (2,2)
365
+ text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', text)
366
+ text = re.sub(r'(\d{1}) (\d{3})', r'\1\2', text)
367
+ text = re.sub(r'(\d{2}) (\d{2})', r'\1\2', text)
368
+
369
+ # Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
370
+ parts = re.findall(r'\b(\d{1,2})\b', text)
371
+ if len(parts) >= 4:
372
+ for i in range(len(parts) - 3):
373
+ candidate = ''.join(parts[i:i + 4])
374
+ if len(candidate) == 4 and candidate.isdigit():
375
+ combined_year = candidate
376
+ pattern = r'\b' + r'\s+'.join(parts[i:i + 4]) + r'\b'
377
+ text = re.sub(pattern, combined_year, text)
378
+ break
379
+
380
+ return text
381
+
382
+
232
383
  def parse_patient_id(text):
233
384
  try:
234
385
  return text.split()[0].lstrip('#') # Extract patient ID number (removing the '#')
235
386
  except Exception as e:
236
- MediLink_ConfigLoader.log("Error parsing patient ID: {}. Error: {}".format(text, e))
387
+ MediLink_ConfigLoader.log("Error parsing patient ID: {}. Error: {}".format(text, e), level="ERROR")
237
388
  return None
238
389
 
390
+
239
391
  def parse_diagnosis_code(text):
240
392
  try:
241
393
  # Regular expression to find all ICD-10 codes starting with 'H' and containing a period
@@ -252,9 +404,10 @@ def parse_diagnosis_code(text):
252
404
  return text.split('/')[0]
253
405
 
254
406
  except Exception as e:
255
- MediLink_ConfigLoader.log("Error parsing diagnosis code: {}. Error: {}".format(text, e))
407
+ MediLink_ConfigLoader.log("Error parsing diagnosis code: {}. Error: {}".format(text, e), level="ERROR")
256
408
  return "Unknown"
257
409
 
410
+
258
411
  def parse_left_or_right_eye(text):
259
412
  try:
260
413
  if 'LEFT EYE' in text.upper():
@@ -264,9 +417,10 @@ def parse_left_or_right_eye(text):
264
417
  else:
265
418
  return 'Unknown'
266
419
  except Exception as e:
267
- MediLink_ConfigLoader.log("Error parsing left or right eye: {}. Error: {}".format(text, e))
420
+ MediLink_ConfigLoader.log("Error parsing left or right eye: {}. Error: {}".format(text, e), level="ERROR")
268
421
  return 'Unknown'
269
422
 
423
+
270
424
  def parse_femto_yes_or_no(text):
271
425
  try:
272
426
  if 'FEMTO' in text.upper():
@@ -274,9 +428,10 @@ def parse_femto_yes_or_no(text):
274
428
  else:
275
429
  return False
276
430
  except Exception as e:
277
- MediLink_ConfigLoader.log("Error parsing femto yes or no: {}. Error: {}".format(text, e))
431
+ MediLink_ConfigLoader.log("Error parsing femto yes or no: {}. Error: {}".format(text, e), level="ERROR")
278
432
  return False
279
433
 
434
+
280
435
  def rotate_docx_files(directory):
281
436
  # List all files in the directory
282
437
  files = os.listdir(directory)
@@ -294,10 +449,12 @@ def rotate_docx_files(directory):
294
449
  pprint.pprint(patient_data_dict)
295
450
  print()
296
451
 
452
+
297
453
  def main():
298
454
  # Call the function with the directory containing your .docx files
299
455
  directory = "C:\\Users\\danie\\Downloads\\"
300
456
  rotate_docx_files(directory)
301
457
 
458
+
302
459
  if __name__ == "__main__":
303
460
  main()