medicafe 0.240716.2__py3-none-any.whl → 0.240925.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of medicafe might be problematic. Click here for more details.
- MediBot/MediBot.bat +56 -16
- MediBot/MediBot.py +100 -78
- MediBot/MediBot_Crosswalk_Library.py +496 -194
- MediBot/MediBot_Preprocessor.py +22 -14
- MediBot/MediBot_Preprocessor_lib.py +301 -143
- MediBot/MediBot_UI.py +25 -24
- MediBot/MediBot_dataformat_library.py +17 -25
- MediBot/MediBot_docx_decoder.py +267 -110
- MediBot/update_json.py +26 -1
- MediBot/update_medicafe.py +134 -44
- MediLink/MediLink.py +95 -53
- MediLink/MediLink_837p_encoder.py +83 -66
- MediLink/MediLink_837p_encoder_library.py +159 -102
- MediLink/MediLink_API_Generator.py +1 -7
- MediLink/MediLink_API_v3.py +348 -63
- MediLink/MediLink_APIs.py +1 -2
- MediLink/MediLink_ClaimStatus.py +21 -6
- MediLink/MediLink_ConfigLoader.py +9 -9
- MediLink/MediLink_DataMgmt.py +321 -100
- MediLink/MediLink_Decoder.py +249 -87
- MediLink/MediLink_Deductible.py +62 -56
- MediLink/MediLink_Down.py +115 -121
- MediLink/MediLink_Gmail.py +2 -11
- MediLink/MediLink_Parser.py +63 -36
- MediLink/MediLink_UI.py +36 -23
- MediLink/MediLink_Up.py +188 -115
- {medicafe-0.240716.2.dist-info → medicafe-0.240925.9.dist-info}/METADATA +1 -1
- medicafe-0.240925.9.dist-info/RECORD +47 -0
- medicafe-0.240716.2.dist-info/RECORD +0 -47
- {medicafe-0.240716.2.dist-info → medicafe-0.240925.9.dist-info}/LICENSE +0 -0
- {medicafe-0.240716.2.dist-info → medicafe-0.240925.9.dist-info}/WHEEL +0 -0
- {medicafe-0.240716.2.dist-info → medicafe-0.240925.9.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#MediBot_dataformat_library.py
|
|
1
2
|
import re
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
import re #for addresses
|
|
@@ -159,31 +160,22 @@ def format_zip(value):
|
|
|
159
160
|
return value_str[:5]
|
|
160
161
|
|
|
161
162
|
def format_data(medisoft_field, value, csv_data, reverse_mapping, parsed_address_components):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
formatted_value = format_policy(value)
|
|
178
|
-
elif medisoft_field == 'Secondary Policy Number':
|
|
179
|
-
formatted_value = format_policy(value)
|
|
180
|
-
elif medisoft_field == 'Primary Group Number':
|
|
181
|
-
formatted_value = format_policy(value)
|
|
182
|
-
elif medisoft_field == 'Secondary Group Number':
|
|
183
|
-
formatted_value = format_policy(value)
|
|
184
|
-
else:
|
|
185
|
-
formatted_value = str(value) # Ensure value is always a string
|
|
186
|
-
|
|
163
|
+
formatters = {
|
|
164
|
+
'Patient Name': format_name,
|
|
165
|
+
'Birth Date': format_date,
|
|
166
|
+
'Phone': format_phone,
|
|
167
|
+
'Phone #2': format_phone,
|
|
168
|
+
'Gender': format_gender,
|
|
169
|
+
'Street': lambda v: format_street(v, csv_data, reverse_mapping, parsed_address_components),
|
|
170
|
+
'Zip Code': format_zip,
|
|
171
|
+
'Primary Policy Number': format_policy,
|
|
172
|
+
'Secondary Policy Number': format_policy,
|
|
173
|
+
'Primary Group Number': format_policy,
|
|
174
|
+
'Secondary Group Number': format_policy
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
formatted_value = formatters.get(medisoft_field, str)(value) # Default to str if not found
|
|
187
178
|
formatted_value = formatted_value.replace(',', '{,}').replace(' ', '{Space}')
|
|
179
|
+
|
|
188
180
|
ahk_command = 'SendInput, {}{{Enter}}'.format(formatted_value)
|
|
189
181
|
return ahk_command
|
MediBot/MediBot_docx_decoder.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
import re
|
|
3
|
-
from lxml import etree
|
|
4
|
-
import zipfile
|
|
1
|
+
#MediBot_docx_decoder.py
|
|
5
2
|
from datetime import datetime
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
3
|
from collections import OrderedDict
|
|
4
|
+
import os, re, sys, zipfile
|
|
5
|
+
from docx import Document
|
|
6
|
+
from lxml import etree
|
|
9
7
|
|
|
10
8
|
# Add parent directory of the project to the Python path
|
|
11
9
|
project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
@@ -16,11 +14,12 @@ try:
|
|
|
16
14
|
except ImportError:
|
|
17
15
|
from MediLink import MediLink_ConfigLoader
|
|
18
16
|
|
|
19
|
-
|
|
17
|
+
|
|
18
|
+
def parse_docx(filepath, surgery_dates): # Accept surgery_dates as a parameter
|
|
20
19
|
try:
|
|
21
20
|
doc = Document(filepath) # Open the .docx file
|
|
22
21
|
except Exception as e:
|
|
23
|
-
MediLink_ConfigLoader.log("Error opening document: {}".format(e)) # Log error
|
|
22
|
+
MediLink_ConfigLoader.log("Error opening document: {}".format(e), level="ERROR") # Log error
|
|
24
23
|
return {}
|
|
25
24
|
|
|
26
25
|
patient_data = OrderedDict() # Initialize OrderedDict to store data
|
|
@@ -29,6 +28,18 @@ def parse_docx(filepath):
|
|
|
29
28
|
date_of_service = extract_date_of_service(filepath) # Extract date of service
|
|
30
29
|
MediLink_ConfigLoader.log("Date of Service recorded as: {}".format(date_of_service), level="DEBUG")
|
|
31
30
|
|
|
31
|
+
# Convert date_of_service to match the format of surgery_dates
|
|
32
|
+
date_of_service = datetime.strptime(date_of_service, '%m-%d-%Y') # Convert to datetime object
|
|
33
|
+
# Check if the date_of_service is in the passed surgery_dates
|
|
34
|
+
if date_of_service not in surgery_dates: # Direct comparison with datetime objects
|
|
35
|
+
MediLink_ConfigLoader.log("Date of Service {} not found in provided surgery dates. Skipping document.".format(date_of_service), level="DEBUG")
|
|
36
|
+
return {} # Early exit if date is not found
|
|
37
|
+
|
|
38
|
+
MediLink_ConfigLoader.log("Date of Service {} found in surgery dates. Proceeding with parsing of the document.".format(date_of_service), level="DEBUG") # Log that date of service was found
|
|
39
|
+
# Convert back to MM-DD-YYYY format.
|
|
40
|
+
# TODO in the future, maybe just do the treatment to surgery_dates, no need to convert back and forth..
|
|
41
|
+
date_of_service = date_of_service.strftime('%m-%d-%Y')
|
|
42
|
+
|
|
32
43
|
for table in doc.tables: # Iterate over tables in the document
|
|
33
44
|
for row in table.rows:
|
|
34
45
|
cells = [cell.text.strip() for cell in row.cells]
|
|
@@ -43,11 +54,11 @@ def parse_docx(filepath):
|
|
|
43
54
|
patient_data[patient_id] = {}
|
|
44
55
|
|
|
45
56
|
if date_of_service in patient_data[patient_id]:
|
|
46
|
-
MediLink_ConfigLoader.log("Duplicate entry for patient ID {} on date {}. Skipping.".format(patient_id, date_of_service))
|
|
57
|
+
MediLink_ConfigLoader.log("Duplicate entry for patient ID {} on date {}. Skipping.".format(patient_id, date_of_service), level="WARNING")
|
|
47
58
|
else:
|
|
48
59
|
patient_data[patient_id][date_of_service] = [diagnosis_code, left_or_right_eye, femto_yes_or_no]
|
|
49
60
|
except Exception as e:
|
|
50
|
-
MediLink_ConfigLoader.log("Error processing row: {}. Error: {}".format(cells, e))
|
|
61
|
+
MediLink_ConfigLoader.log("Error processing row: {}. Error: {}".format(cells, e), level="ERROR")
|
|
51
62
|
|
|
52
63
|
# Validation steps
|
|
53
64
|
validate_unknown_entries(patient_data)
|
|
@@ -55,17 +66,18 @@ def parse_docx(filepath):
|
|
|
55
66
|
|
|
56
67
|
return patient_data
|
|
57
68
|
|
|
69
|
+
|
|
58
70
|
def validate_unknown_entries(patient_data):
|
|
59
71
|
for patient_id, dates in list(patient_data.items()):
|
|
60
72
|
for date, details in list(dates.items()):
|
|
61
73
|
if 'Unknown' in details:
|
|
62
74
|
warning_message = "Warning: 'Unknown' entry found. Patient ID: {}, Date: {}, Details: {}".format(patient_id, date, details)
|
|
63
75
|
MediLink_ConfigLoader.log(warning_message, level="WARNING")
|
|
64
|
-
print(warning_message)
|
|
65
76
|
del patient_data[patient_id][date]
|
|
66
77
|
if not patient_data[patient_id]: # If no dates left for the patient, remove the patient
|
|
67
78
|
del patient_data[patient_id]
|
|
68
79
|
|
|
80
|
+
|
|
69
81
|
def validate_diagnostic_code(patient_data):
|
|
70
82
|
for patient_id, dates in patient_data.items():
|
|
71
83
|
for date, details in dates.items():
|
|
@@ -76,125 +88,133 @@ def validate_diagnostic_code(patient_data):
|
|
|
76
88
|
elif eye == 'Right' and not diagnostic_code.endswith('1'):
|
|
77
89
|
log_and_warn(patient_id, date, diagnostic_code, eye)
|
|
78
90
|
|
|
91
|
+
|
|
79
92
|
def log_and_warn(patient_id, date, diagnostic_code, eye):
|
|
80
93
|
warning_message = (
|
|
81
94
|
"Warning: Mismatch found for Patient ID: {}, Date: {}, "
|
|
82
95
|
"Diagnostic Code: {}, Eye: {}".format(patient_id, date, diagnostic_code, eye)
|
|
83
96
|
)
|
|
84
97
|
MediLink_ConfigLoader.log(warning_message, level="WARNING")
|
|
85
|
-
print(warning_message)
|
|
86
98
|
|
|
87
|
-
|
|
88
|
-
def extract_date_of_service(docx_path):
|
|
89
|
-
extract_to = "
|
|
99
|
+
|
|
100
|
+
def extract_date_of_service(docx_path, use_in_memory=True):
|
|
101
|
+
extract_to = "extracted_docx_debug"
|
|
102
|
+
in_memory_result = None
|
|
103
|
+
directory_based_result = None
|
|
104
|
+
|
|
105
|
+
# Log the selected approach
|
|
106
|
+
if use_in_memory:
|
|
107
|
+
MediLink_ConfigLoader.log("Using In-Memory extraction approach for Surgery Schedule.", level="INFO")
|
|
108
|
+
else:
|
|
109
|
+
MediLink_ConfigLoader.log("Using Directory-Based extraction approach for Surgery Schedule.", level="INFO")
|
|
110
|
+
|
|
111
|
+
# Directory-Based Extraction
|
|
112
|
+
if not use_in_memory: # Only perform directory-based extraction if in-memory is not selected
|
|
113
|
+
try:
|
|
114
|
+
if not os.path.exists(extract_to):
|
|
115
|
+
os.makedirs(extract_to)
|
|
116
|
+
MediLink_ConfigLoader.log("Created extraction directory: {}".format(extract_to), level="DEBUG")
|
|
117
|
+
|
|
118
|
+
with zipfile.ZipFile(docx_path, 'r') as docx:
|
|
119
|
+
MediLink_ConfigLoader.log("Opened DOCX file: {}".format(docx_path), level="DEBUG")
|
|
120
|
+
docx.extractall(extract_to)
|
|
121
|
+
MediLink_ConfigLoader.log("Extracted DOCX to: {}".format(extract_to), level="DEBUG")
|
|
122
|
+
|
|
123
|
+
file_path = find_text_in_xml(extract_to, "Surgery Schedule")
|
|
124
|
+
if file_path:
|
|
125
|
+
MediLink_ConfigLoader.log("Found XML file with target text: {}".format(file_path), level="DEBUG")
|
|
126
|
+
directory_based_result = extract_date_from_file(file_path)
|
|
127
|
+
MediLink_ConfigLoader.log("Directory-Based Extraction Result: {}".format(directory_based_result), level="DEBUG")
|
|
128
|
+
else:
|
|
129
|
+
MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files.", level="WARNING")
|
|
130
|
+
except zipfile.BadZipFile as e:
|
|
131
|
+
MediLink_ConfigLoader.log("BadZipFile Error opening DOCX file {}: {}".format(docx_path, e), level="ERROR")
|
|
132
|
+
except Exception as e:
|
|
133
|
+
MediLink_ConfigLoader.log("Error opening DOCX file {}: {}".format(docx_path, e), level="ERROR")
|
|
134
|
+
|
|
135
|
+
# In-Memory Extraction // Single-Pass Processing is typically more efficient in terms of both time and memory compared to list creation for header isolation.
|
|
136
|
+
if use_in_memory: # Only perform in-memory extraction if selected
|
|
137
|
+
try:
|
|
138
|
+
with zipfile.ZipFile(docx_path, 'r') as docx:
|
|
139
|
+
MediLink_ConfigLoader.log("Opened DOCX file for In-Memory extraction: {}".format(docx_path), level="DEBUG")
|
|
140
|
+
for file_info in docx.infolist():
|
|
141
|
+
if file_info.filename.endswith('.xml'):
|
|
142
|
+
MediLink_ConfigLoader.log("Processing XML file in-memory: {}".format(file_info.filename), level="DEBUG")
|
|
143
|
+
with docx.open(file_info) as file:
|
|
144
|
+
try:
|
|
145
|
+
xml_content = file.read() # Read the entire XML content
|
|
146
|
+
MediLink_ConfigLoader.log("Read XML content from {}".format(file_info.filename), level="DEBUG")
|
|
147
|
+
if "Surgery Schedule" in xml_content.decode('utf-8', errors='ignore'):
|
|
148
|
+
MediLink_ConfigLoader.log("Found 'Surgery Schedule' in file: {}".format(file_info.filename), level="DEBUG")
|
|
149
|
+
in_memory_result = extract_date_from_content(xml_content)
|
|
150
|
+
MediLink_ConfigLoader.log("In-Memory Extraction Result from {}: {}".format(file_info.filename, in_memory_result), level="DEBUG")
|
|
151
|
+
break # Stop after finding the first relevant file
|
|
152
|
+
except Exception as e:
|
|
153
|
+
MediLink_ConfigLoader.log("Error parsing XML file {} (In-Memory): {}".format(file_info.filename, e), level="ERROR")
|
|
154
|
+
|
|
155
|
+
if in_memory_result is None:
|
|
156
|
+
MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files (In-Memory).", level="WARNING")
|
|
157
|
+
except zipfile.BadZipFile as e:
|
|
158
|
+
MediLink_ConfigLoader.log("BadZipFile Error opening DOCX file for In-Memory extraction {}: {}".format(docx_path, e), level="ERROR")
|
|
159
|
+
except Exception as e:
|
|
160
|
+
MediLink_ConfigLoader.log("Error during In-Memory extraction of DOCX file {}: {}".format(docx_path, e), level="ERROR")
|
|
161
|
+
|
|
162
|
+
# Clean up the extracted directory if it exists
|
|
90
163
|
try:
|
|
91
|
-
if
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
file_path = find_text_in_xml(extract_to, "Surgery Schedule")
|
|
98
|
-
if file_path:
|
|
99
|
-
return extract_date_from_file(file_path)
|
|
100
|
-
else:
|
|
101
|
-
MediLink_ConfigLoader.log("Target text 'Surgery Schedule' not found in any XML files.", level="WARNING")
|
|
102
|
-
return None
|
|
103
|
-
finally:
|
|
104
|
-
# Clean up the extracted files
|
|
105
|
-
remove_directory(extract_to)
|
|
106
|
-
MediLink_ConfigLoader.log("Cleaned up extracted files in: {}".format(extract_to), level="DEBUG")
|
|
164
|
+
if os.path.exists(extract_to):
|
|
165
|
+
remove_directory(extract_to)
|
|
166
|
+
MediLink_ConfigLoader.log("Cleaned up extracted files in: {}".format(extract_to), level="DEBUG")
|
|
167
|
+
except Exception as e:
|
|
168
|
+
MediLink_ConfigLoader.log("Error cleaning up extraction directory {}: {}".format(extract_to, e), level="ERROR")
|
|
107
169
|
|
|
108
|
-
|
|
109
|
-
if
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
os.rmdir(path)
|
|
170
|
+
# Decide which result to return (prefer in-memory if available)
|
|
171
|
+
if in_memory_result:
|
|
172
|
+
return in_memory_result
|
|
173
|
+
elif directory_based_result:
|
|
174
|
+
return directory_based_result
|
|
175
|
+
else:
|
|
176
|
+
return None
|
|
116
177
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
for root_dir, dirs, files in os.walk(
|
|
178
|
+
def find_text_in_xml(extract_dir, target_text):
|
|
179
|
+
target_pattern = re.compile(re.escape(target_text), re.IGNORECASE)
|
|
180
|
+
for root_dir, dirs, files in os.walk(extract_dir):
|
|
120
181
|
for file in files:
|
|
121
|
-
if file.endswith('.xml'):
|
|
182
|
+
if file.endswith('.xml') and file != '[Content_Types].xml': # Skip Content_Types.xml
|
|
122
183
|
file_path = os.path.join(root_dir, file)
|
|
123
184
|
try:
|
|
124
185
|
tree = etree.parse(file_path)
|
|
125
186
|
root = tree.getroot()
|
|
126
|
-
namespaces =
|
|
187
|
+
namespaces = root.nsmap
|
|
188
|
+
MediLink_ConfigLoader.log("Processing file: {}".format(file_path), level="DEBUG")
|
|
127
189
|
for elem in root.xpath('//w:t', namespaces=namespaces):
|
|
128
|
-
if elem.text and
|
|
129
|
-
MediLink_ConfigLoader.log("Found target text in file: {}".format(file_path), level="DEBUG")
|
|
190
|
+
if elem.text and target_pattern.search(elem.text):
|
|
191
|
+
MediLink_ConfigLoader.log("Found target text '{}' in file: {}".format(target_text, file_path), level="DEBUG")
|
|
130
192
|
return file_path
|
|
193
|
+
except etree.XMLSyntaxError as e:
|
|
194
|
+
MediLink_ConfigLoader.log("XMLSyntaxError parsing file {}: {}".format(file_path, e), level="ERROR")
|
|
131
195
|
except Exception as e:
|
|
132
|
-
MediLink_ConfigLoader.log("Error parsing XML file {}: {}".format(file_path, e))
|
|
133
|
-
|
|
196
|
+
MediLink_ConfigLoader.log("Error parsing XML file {}: {}".format(file_path, e), level="ERROR")
|
|
197
|
+
MediLink_ConfigLoader.log("Target text '{}' not found in any XML files within directory: {}".format(target_text, extract_dir), level="WARNING")
|
|
134
198
|
return None
|
|
135
199
|
|
|
136
|
-
# Normalize month and day abbreviations
|
|
137
|
-
def normalize_text(text):
|
|
138
|
-
month_map = {
|
|
139
|
-
'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
|
|
140
|
-
'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
|
|
141
|
-
'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
|
|
142
|
-
}
|
|
143
|
-
day_map = {
|
|
144
|
-
'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
|
|
145
|
-
'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
for abbr, full in month_map.items():
|
|
149
|
-
text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
|
|
150
|
-
for abbr, full in day_map.items():
|
|
151
|
-
text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
|
|
152
|
-
|
|
153
|
-
return text
|
|
154
|
-
|
|
155
|
-
def reassemble_year(text):
|
|
156
|
-
# First, handle the most common case where a 4-digit year is split as (3,1), (1,3), or (2,2)
|
|
157
|
-
text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', text)
|
|
158
|
-
text = re.sub(r'(\d{1}) (\d{3})', r'\1\2', text)
|
|
159
|
-
text = re.sub(r'(\d{2}) (\d{2})', r'\1\2', text)
|
|
160
|
-
|
|
161
|
-
# Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
|
|
162
|
-
parts = re.findall(r'\b(\d{1,2})\b', text)
|
|
163
|
-
if len(parts) >= 4:
|
|
164
|
-
for i in range(len(parts) - 3):
|
|
165
|
-
candidate = ''.join(parts[i:i + 4])
|
|
166
|
-
if len(candidate) == 4 and candidate.isdigit():
|
|
167
|
-
combined_year = candidate
|
|
168
|
-
text = re.sub(r'\b' + r'\b \b'.join(parts[i:i + 4]) + r'\b', combined_year, text)
|
|
169
|
-
break
|
|
170
|
-
|
|
171
|
-
return text
|
|
172
|
-
|
|
173
|
-
# Extract and parse the date from the file
|
|
174
200
|
def extract_date_from_file(file_path):
|
|
175
201
|
try:
|
|
176
202
|
tree = etree.parse(file_path)
|
|
177
203
|
root = tree.getroot()
|
|
178
204
|
collected_text = []
|
|
179
205
|
|
|
180
|
-
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
|
206
|
+
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} # Hardcoded for XP handling BUG
|
|
181
207
|
for elem in root.xpath('//w:t', namespaces=namespaces):
|
|
182
208
|
if elem.text:
|
|
183
209
|
collected_text.append(elem.text.strip())
|
|
184
210
|
|
|
185
|
-
for elem in root.iter():
|
|
186
|
-
if elem.tag.endswith('t') and elem.text:
|
|
187
|
-
collected_text.append(elem.text.strip())
|
|
188
|
-
|
|
189
211
|
combined_text = ' '.join(collected_text)
|
|
190
|
-
combined_text = reassemble_year(combined_text)
|
|
191
|
-
# combined_text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', combined_text) # initial year regex.
|
|
212
|
+
combined_text = reassemble_year(combined_text) # Fix OCR splitting years
|
|
192
213
|
combined_text = normalize_text(combined_text) # Normalize abbreviations
|
|
193
214
|
combined_text = re.sub(r',', '', combined_text) # Remove commas if they exist
|
|
194
215
|
|
|
195
216
|
# Log the combined text
|
|
196
|
-
MediLink_ConfigLoader.log("Combined text: {}".format(combined_text), level="DEBUG")
|
|
197
|
-
# print("DEBUG: Combined text: {}".format(combined_text))
|
|
217
|
+
MediLink_ConfigLoader.log("Combined text from file '{}': {}".format(file_path, combined_text[:200]), level="DEBUG")
|
|
198
218
|
|
|
199
219
|
day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
|
|
200
220
|
month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
|
|
@@ -203,39 +223,171 @@ def extract_date_from_file(file_path):
|
|
|
203
223
|
day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
|
|
204
224
|
month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
|
|
205
225
|
year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
|
|
206
|
-
|
|
226
|
+
|
|
207
227
|
# Log the results of the regex searches
|
|
208
228
|
MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
|
|
209
229
|
MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
|
|
210
230
|
MediLink_ConfigLoader.log("Year found: {}".format(year_match.group() if year_match else 'None'), level="DEBUG")
|
|
231
|
+
|
|
232
|
+
if day_of_week and month_day and year_match:
|
|
233
|
+
date_str = "{} {} {}".format(day_of_week.group(), month_day.group(), year_match.group())
|
|
234
|
+
try:
|
|
235
|
+
date_obj = datetime.strptime(date_str, '%A %B %d %Y')
|
|
236
|
+
extracted_date = date_obj.strftime('%m-%d-%Y')
|
|
237
|
+
MediLink_ConfigLoader.log("Extracted date: {}".format(extracted_date), level="DEBUG")
|
|
238
|
+
return extracted_date
|
|
239
|
+
except ValueError as e:
|
|
240
|
+
MediLink_ConfigLoader.log("Error converting date: {}. Error: {}".format(date_str, e), level="ERROR")
|
|
241
|
+
else:
|
|
242
|
+
MediLink_ConfigLoader.log(
|
|
243
|
+
"Date components not found or incomplete. Combined text: '{}', Day of week: {}, Month and day: {}, Year: {}".format(
|
|
244
|
+
combined_text,
|
|
245
|
+
day_of_week.group() if day_of_week else 'None',
|
|
246
|
+
month_day.group() if month_day else 'None',
|
|
247
|
+
year_match.group() if year_match else 'None'
|
|
248
|
+
), level="WARNING"
|
|
249
|
+
)
|
|
250
|
+
except etree.XMLSyntaxError as e:
|
|
251
|
+
MediLink_ConfigLoader.log("XMLSyntaxError in extract_date_from_file '{}': {}".format(file_path, e), level="ERROR")
|
|
252
|
+
except Exception as e:
|
|
253
|
+
MediLink_ConfigLoader.log("Error extracting date from file '{}': {}".format(file_path, e), level="ERROR")
|
|
254
|
+
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def extract_date_from_content(xml_content):
|
|
259
|
+
try:
|
|
260
|
+
# Parse the XML content into an ElementTree
|
|
261
|
+
tree = etree.fromstring(xml_content)
|
|
262
|
+
root = tree # root is already the root element in this case
|
|
263
|
+
collected_text = []
|
|
264
|
+
|
|
265
|
+
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
|
266
|
+
MediLink_ConfigLoader.log("Using namespaces: {}".format(namespaces), level="DEBUG")
|
|
267
|
+
|
|
268
|
+
# Extract text from all <w:t> elements
|
|
269
|
+
for elem in root.xpath('//w:t', namespaces=namespaces):
|
|
270
|
+
if elem.text:
|
|
271
|
+
collected_text.append(elem.text.strip())
|
|
272
|
+
|
|
273
|
+
# Log the collected text snippets
|
|
274
|
+
MediLink_ConfigLoader.log("Collected text snippets: {}".format(collected_text), level="DEBUG")
|
|
275
|
+
|
|
276
|
+
combined_text = ' '.join(collected_text)
|
|
277
|
+
combined_text = reassemble_year(combined_text) # Fix OCR splitting years
|
|
278
|
+
combined_text = normalize_text(combined_text) # Normalize abbreviations
|
|
279
|
+
combined_text = re.sub(r',', '', combined_text) # Remove commas if they exist
|
|
280
|
+
|
|
281
|
+
# Log the combined text
|
|
282
|
+
MediLink_ConfigLoader.log("Combined text: {}".format(combined_text[:200]), level="DEBUG") # Log first 200 characters
|
|
283
|
+
|
|
284
|
+
day_week_pattern = r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)"
|
|
285
|
+
month_day_pattern = r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}"
|
|
286
|
+
year_pattern = r"\d{4}"
|
|
287
|
+
|
|
288
|
+
day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
|
|
289
|
+
month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
|
|
290
|
+
year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
|
|
291
|
+
|
|
292
|
+
MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
|
|
293
|
+
MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
|
|
294
|
+
MediLink_ConfigLoader.log("Year found: {}".format(year_match.group() if year_match else 'None'), level="DEBUG")
|
|
211
295
|
|
|
212
296
|
if day_of_week and month_day and year_match:
|
|
213
297
|
date_str = "{} {} {}".format(day_of_week.group(), month_day.group(), year_match.group())
|
|
214
298
|
try:
|
|
215
299
|
date_obj = datetime.strptime(date_str, '%A %B %d %Y')
|
|
216
|
-
|
|
300
|
+
extracted_date = date_obj.strftime('%m-%d-%Y')
|
|
301
|
+
MediLink_ConfigLoader.log("Extracted date: {}".format(extracted_date), level="DEBUG")
|
|
302
|
+
return extracted_date
|
|
217
303
|
except ValueError as e:
|
|
218
304
|
MediLink_ConfigLoader.log("Error converting date: {}. Error: {}".format(date_str, e), level="ERROR")
|
|
219
305
|
else:
|
|
220
|
-
MediLink_ConfigLoader.log(
|
|
221
|
-
.format(
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
306
|
+
MediLink_ConfigLoader.log(
|
|
307
|
+
"Date components not found or incomplete. Combined text: '{}', Day of week: {}, Month and day: {}, Year: {}".format(
|
|
308
|
+
combined_text,
|
|
309
|
+
day_of_week.group() if day_of_week else 'None',
|
|
310
|
+
month_day.group() if month_day else 'None',
|
|
311
|
+
year_match.group() if year_match else 'None'
|
|
312
|
+
), level="WARNING"
|
|
313
|
+
)
|
|
314
|
+
except etree.XMLSyntaxError as e:
|
|
315
|
+
MediLink_ConfigLoader.log("XMLSyntaxError in extract_date_from_content: {}".format(e), level="ERROR")
|
|
226
316
|
except Exception as e:
|
|
227
|
-
MediLink_ConfigLoader.log("Error extracting date from
|
|
228
|
-
|
|
229
|
-
|
|
317
|
+
MediLink_ConfigLoader.log("Error extracting date from content: {}".format(e), level="ERROR")
|
|
318
|
+
|
|
230
319
|
return None
|
|
231
320
|
|
|
321
|
+
|
|
322
|
+
def remove_directory(path):
|
|
323
|
+
if os.path.exists(path):
|
|
324
|
+
for root, dirs, files in os.walk(path, topdown=False):
|
|
325
|
+
for name in files:
|
|
326
|
+
try:
|
|
327
|
+
os.remove(os.path.join(root, name))
|
|
328
|
+
MediLink_ConfigLoader.log("Removed file: {}".format(os.path.join(root, name)), level="DEBUG")
|
|
329
|
+
except Exception as e:
|
|
330
|
+
MediLink_ConfigLoader.log("Error removing file {}: {}".format(os.path.join(root, name), e), level="ERROR")
|
|
331
|
+
for name in dirs:
|
|
332
|
+
try:
|
|
333
|
+
os.rmdir(os.path.join(root, name))
|
|
334
|
+
MediLink_ConfigLoader.log("Removed directory: {}".format(os.path.join(root, name)), level="DEBUG")
|
|
335
|
+
except Exception as e:
|
|
336
|
+
MediLink_ConfigLoader.log("Error removing directory {}: {}".format(os.path.join(root, name), e), level="ERROR")
|
|
337
|
+
try:
|
|
338
|
+
os.rmdir(path)
|
|
339
|
+
MediLink_ConfigLoader.log("Removed extraction root directory: {}".format(path), level="DEBUG")
|
|
340
|
+
except Exception as e:
|
|
341
|
+
MediLink_ConfigLoader.log("Error removing root directory {}: {}".format(path, e), level="ERROR")
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def normalize_text(text):
|
|
345
|
+
month_map = {
|
|
346
|
+
'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
|
|
347
|
+
'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
|
|
348
|
+
'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
|
|
349
|
+
}
|
|
350
|
+
day_map = {
|
|
351
|
+
'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
|
|
352
|
+
'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
for abbr, full in month_map.items():
|
|
356
|
+
text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
|
|
357
|
+
for abbr, full in day_map.items():
|
|
358
|
+
text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
|
|
359
|
+
|
|
360
|
+
return text
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def reassemble_year(text):
|
|
364
|
+
# First, handle the most common case where a 4-digit year is split as (3,1), (1,3), or (2,2)
|
|
365
|
+
text = re.sub(r'(\d{3}) (\d{1})', r'\1\2', text)
|
|
366
|
+
text = re.sub(r'(\d{1}) (\d{3})', r'\1\2', text)
|
|
367
|
+
text = re.sub(r'(\d{2}) (\d{2})', r'\1\2', text)
|
|
368
|
+
|
|
369
|
+
# Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
|
|
370
|
+
parts = re.findall(r'\b(\d{1,2})\b', text)
|
|
371
|
+
if len(parts) >= 4:
|
|
372
|
+
for i in range(len(parts) - 3):
|
|
373
|
+
candidate = ''.join(parts[i:i + 4])
|
|
374
|
+
if len(candidate) == 4 and candidate.isdigit():
|
|
375
|
+
combined_year = candidate
|
|
376
|
+
pattern = r'\b' + r'\s+'.join(parts[i:i + 4]) + r'\b'
|
|
377
|
+
text = re.sub(pattern, combined_year, text)
|
|
378
|
+
break
|
|
379
|
+
|
|
380
|
+
return text
|
|
381
|
+
|
|
382
|
+
|
|
232
383
|
def parse_patient_id(text):
|
|
233
384
|
try:
|
|
234
385
|
return text.split()[0].lstrip('#') # Extract patient ID number (removing the '#')
|
|
235
386
|
except Exception as e:
|
|
236
|
-
MediLink_ConfigLoader.log("Error parsing patient ID: {}. Error: {}".format(text, e))
|
|
387
|
+
MediLink_ConfigLoader.log("Error parsing patient ID: {}. Error: {}".format(text, e), level="ERROR")
|
|
237
388
|
return None
|
|
238
389
|
|
|
390
|
+
|
|
239
391
|
def parse_diagnosis_code(text):
|
|
240
392
|
try:
|
|
241
393
|
# Regular expression to find all ICD-10 codes starting with 'H' and containing a period
|
|
@@ -252,9 +404,10 @@ def parse_diagnosis_code(text):
|
|
|
252
404
|
return text.split('/')[0]
|
|
253
405
|
|
|
254
406
|
except Exception as e:
|
|
255
|
-
MediLink_ConfigLoader.log("Error parsing diagnosis code: {}. Error: {}".format(text, e))
|
|
407
|
+
MediLink_ConfigLoader.log("Error parsing diagnosis code: {}. Error: {}".format(text, e), level="ERROR")
|
|
256
408
|
return "Unknown"
|
|
257
409
|
|
|
410
|
+
|
|
258
411
|
def parse_left_or_right_eye(text):
|
|
259
412
|
try:
|
|
260
413
|
if 'LEFT EYE' in text.upper():
|
|
@@ -264,9 +417,10 @@ def parse_left_or_right_eye(text):
|
|
|
264
417
|
else:
|
|
265
418
|
return 'Unknown'
|
|
266
419
|
except Exception as e:
|
|
267
|
-
MediLink_ConfigLoader.log("Error parsing left or right eye: {}. Error: {}".format(text, e))
|
|
420
|
+
MediLink_ConfigLoader.log("Error parsing left or right eye: {}. Error: {}".format(text, e), level="ERROR")
|
|
268
421
|
return 'Unknown'
|
|
269
422
|
|
|
423
|
+
|
|
270
424
|
def parse_femto_yes_or_no(text):
|
|
271
425
|
try:
|
|
272
426
|
if 'FEMTO' in text.upper():
|
|
@@ -274,9 +428,10 @@ def parse_femto_yes_or_no(text):
|
|
|
274
428
|
else:
|
|
275
429
|
return False
|
|
276
430
|
except Exception as e:
|
|
277
|
-
MediLink_ConfigLoader.log("Error parsing femto yes or no: {}. Error: {}".format(text, e))
|
|
431
|
+
MediLink_ConfigLoader.log("Error parsing femto yes or no: {}. Error: {}".format(text, e), level="ERROR")
|
|
278
432
|
return False
|
|
279
433
|
|
|
434
|
+
|
|
280
435
|
def rotate_docx_files(directory):
|
|
281
436
|
# List all files in the directory
|
|
282
437
|
files = os.listdir(directory)
|
|
@@ -294,10 +449,12 @@ def rotate_docx_files(directory):
|
|
|
294
449
|
pprint.pprint(patient_data_dict)
|
|
295
450
|
print()
|
|
296
451
|
|
|
452
|
+
|
|
297
453
|
def main():
|
|
298
454
|
# Call the function with the directory containing your .docx files
|
|
299
455
|
directory = "C:\\Users\\danie\\Downloads\\"
|
|
300
456
|
rotate_docx_files(directory)
|
|
301
457
|
|
|
458
|
+
|
|
302
459
|
if __name__ == "__main__":
|
|
303
460
|
main()
|
MediBot/update_json.py
CHANGED
|
@@ -1,7 +1,25 @@
|
|
|
1
|
+
# update_json.py
|
|
1
2
|
import json
|
|
2
3
|
import sys
|
|
3
4
|
from collections import OrderedDict
|
|
4
5
|
|
|
6
|
+
def get_current_csv_path(json_file):
|
|
7
|
+
try:
|
|
8
|
+
with open(json_file, 'r', encoding='utf-8') as file:
|
|
9
|
+
try:
|
|
10
|
+
data = json.load(file, object_pairs_hook=OrderedDict)
|
|
11
|
+
return data.get('CSV_FILE_PATH', None)
|
|
12
|
+
except ValueError as decode_err:
|
|
13
|
+
print("Error decoding JSON file '{}': {}".format(json_file, decode_err))
|
|
14
|
+
sys.exit(1)
|
|
15
|
+
except IOError as io_err:
|
|
16
|
+
print("Error accessing file '{}': {}".format(json_file, io_err))
|
|
17
|
+
sys.exit(1)
|
|
18
|
+
except Exception as e:
|
|
19
|
+
print("An unexpected error occurred: {}".format(e))
|
|
20
|
+
sys.exit(1)
|
|
21
|
+
return None
|
|
22
|
+
|
|
5
23
|
def update_csv_path(json_file, new_path):
|
|
6
24
|
try:
|
|
7
25
|
with open(json_file, 'r', encoding='utf-8') as file:
|
|
@@ -38,6 +56,13 @@ if __name__ == "__main__":
|
|
|
38
56
|
json_path = sys.argv[1]
|
|
39
57
|
new_csv_path = sys.argv[2]
|
|
40
58
|
update_csv_path(json_path, new_csv_path)
|
|
59
|
+
elif len(sys.argv) == 2:
|
|
60
|
+
json_path = sys.argv[1]
|
|
61
|
+
current_csv_path = get_current_csv_path(json_path)
|
|
62
|
+
if current_csv_path:
|
|
63
|
+
print(current_csv_path)
|
|
64
|
+
else:
|
|
65
|
+
print("No CSV path found in config.")
|
|
41
66
|
else:
|
|
42
|
-
print("Usage: update_json.py <path_to_json_file> <new_csv_path>")
|
|
67
|
+
print("Usage: update_json.py <path_to_json_file> [<new_csv_path>]")
|
|
43
68
|
sys.exit(1)
|