medicafe 0.240415.1__py3-none-any.whl → 0.240517.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of medicafe might be problematic. Click here for more details.
- MediBot/MediBot.bat +198 -0
- MediBot/MediBot.py +346 -0
- MediBot/MediBot_Charges.py +28 -0
- MediBot/MediBot_Crosswalk_Library.py +280 -0
- MediBot/MediBot_Preprocessor.py +247 -0
- MediBot/MediBot_Preprocessor_lib.py +357 -0
- MediBot/MediBot_UI.py +240 -0
- MediBot/MediBot_dataformat_library.py +198 -0
- MediBot/MediBot_docx_decoder.py +80 -0
- MediBot/MediPost.py +5 -0
- MediBot/PDF_to_CSV_Cleaner.py +211 -0
- MediBot/__init__.py +0 -0
- MediBot/update_json.py +43 -0
- MediBot/update_medicafe.py +57 -0
- MediLink/MediLink.py +381 -0
- MediLink/MediLink_277_decoder.py +92 -0
- MediLink/MediLink_837p_encoder.py +502 -0
- MediLink/MediLink_837p_encoder_library.py +890 -0
- MediLink/MediLink_API_v2.py +174 -0
- MediLink/MediLink_APIs.py +137 -0
- MediLink/MediLink_ConfigLoader.py +81 -0
- MediLink/MediLink_DataMgmt.py +258 -0
- MediLink/MediLink_Down.py +128 -0
- MediLink/MediLink_ERA_decoder.py +192 -0
- MediLink/MediLink_Gmail.py +100 -0
- MediLink/MediLink_Mailer.py +7 -0
- MediLink/MediLink_Scheduler.py +173 -0
- MediLink/MediLink_StatusCheck.py +4 -0
- MediLink/MediLink_UI.py +118 -0
- MediLink/MediLink_Up.py +383 -0
- MediLink/MediLink_batch.bat +7 -0
- MediLink/Soumit_api.py +22 -0
- MediLink/__init__.py +0 -0
- MediLink/test.py +74 -0
- medicafe-0.240517.0.dist-info/METADATA +53 -0
- medicafe-0.240517.0.dist-info/RECORD +39 -0
- {medicafe-0.240415.1.dist-info → medicafe-0.240517.0.dist-info}/WHEEL +1 -1
- medicafe-0.240517.0.dist-info/top_level.txt +2 -0
- medicafe-0.240415.1.dist-info/METADATA +0 -17
- medicafe-0.240415.1.dist-info/RECORD +0 -5
- medicafe-0.240415.1.dist-info/top_level.txt +0 -1
- {medicafe-0.240415.1.dist-info → medicafe-0.240517.0.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
import re #for addresses
|
|
4
|
+
|
|
5
|
+
# Add parent directory of the project to the Python path
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
9
|
+
sys.path.append(project_dir)
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from MediLink import MediLink_ConfigLoader
|
|
13
|
+
config, crosswalk = MediLink_ConfigLoader.load_configuration()
|
|
14
|
+
except ImportError:
|
|
15
|
+
from MediLink_ConfigLoader import load_configuration
|
|
16
|
+
config, crosswalk = load_configuration()
|
|
17
|
+
|
|
18
|
+
from MediBot_Preprocessor_lib import open_csv_for_editing, initialize
|
|
19
|
+
from MediBot_UI import manage_script_pause, app_control
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
- [X] (TEST) Address Formatting 30-character Limit:
|
|
24
|
+
(LOW) Address the issue where the format_street function in Medibot may produce addresses exceeding
|
|
25
|
+
the 30-character limit. Current stop-gap is removing period characters and the abbreviation "APT"
|
|
26
|
+
surrounded by spaces from all records as a temporary solution.
|
|
27
|
+
If the address still exceeds 30 characters, the function will attempt to remove spaces from right to left
|
|
28
|
+
until it reaches 30 significant digits or runs out of spaces, then truncate to 30 characters if necessary.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
# Bring in all the constants
|
|
32
|
+
initialize(config)
|
|
33
|
+
|
|
34
|
+
# Format Data
|
|
35
|
+
def format_name(value):
|
|
36
|
+
if ',' in value:
|
|
37
|
+
return value
|
|
38
|
+
hyphenated_name_pattern = r'(?P<First>[\w-]+)\s+(?P<Middle>[\w-]?)\s+(?P<Last>[\w-]+)'
|
|
39
|
+
match = re.match(hyphenated_name_pattern, value)
|
|
40
|
+
if match:
|
|
41
|
+
first_name = match.group('First')
|
|
42
|
+
middle_name = match.group('Middle') or ''
|
|
43
|
+
if len(middle_name) > 1:
|
|
44
|
+
middle_name = middle_name[0] # take only the first character
|
|
45
|
+
last_name = match.group('Last')
|
|
46
|
+
return '{}, {} {}'.format(last_name, first_name, middle_name).strip()
|
|
47
|
+
parts = value.split()
|
|
48
|
+
return '{}, {}'.format(parts[-1], ' '.join(parts[:-1]))
|
|
49
|
+
|
|
50
|
+
def format_date(value):
|
|
51
|
+
try:
|
|
52
|
+
date_obj = datetime.strptime(value, '%m/%d/%Y')
|
|
53
|
+
return date_obj.strftime('%m%d%Y')
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
print("Date format error:", e)
|
|
56
|
+
return value
|
|
57
|
+
|
|
58
|
+
def format_phone(value):
|
|
59
|
+
digits = ''.join(filter(str.isdigit, value))
|
|
60
|
+
if len(digits) == 10:
|
|
61
|
+
return digits
|
|
62
|
+
print("Phone number format error: Invalid number of digits")
|
|
63
|
+
return value
|
|
64
|
+
|
|
65
|
+
def format_policy(value):
|
|
66
|
+
alphanumeric = ''.join(filter(str.isalnum, value))
|
|
67
|
+
return alphanumeric
|
|
68
|
+
|
|
69
|
+
def format_gender(value):
|
|
70
|
+
return value[0].upper()
|
|
71
|
+
|
|
72
|
+
def enforce_significant_length(output):
|
|
73
|
+
# Replace spaces with a placeholder that counts as one significant digit
|
|
74
|
+
temp_output = output.replace('{Space}', ' ')
|
|
75
|
+
|
|
76
|
+
# Check if the number of significant digits exceeds 30
|
|
77
|
+
if len(temp_output) > 30:
|
|
78
|
+
|
|
79
|
+
# First line of defense: Replace ' APT ' with ' #' if the original length is longer than 30 characters.
|
|
80
|
+
temp_output = temp_output.replace(' APT ', ' #')
|
|
81
|
+
|
|
82
|
+
# Remove spaces in a controlled manner from right to left if still too long
|
|
83
|
+
while len(temp_output) > 30:
|
|
84
|
+
# Find the last space
|
|
85
|
+
last_space_index = temp_output.rfind(' ')
|
|
86
|
+
if last_space_index == -1:
|
|
87
|
+
break
|
|
88
|
+
# Remove the last space
|
|
89
|
+
temp_output = temp_output[:last_space_index] + temp_output[last_space_index+7:]
|
|
90
|
+
|
|
91
|
+
# If still greater than 30, truncate to 30 characters
|
|
92
|
+
if len(temp_output) > 30:
|
|
93
|
+
temp_output = temp_output[:30]
|
|
94
|
+
|
|
95
|
+
# Replace placeholder back with actual space for final return
|
|
96
|
+
return temp_output.replace(' ', '{Space}')
|
|
97
|
+
|
|
98
|
+
def format_street(value, csv_data, reverse_mapping, parsed_address_components):
|
|
99
|
+
# Temporarily disable script pause status
|
|
100
|
+
app_control.set_pause_status(False)
|
|
101
|
+
|
|
102
|
+
# Remove period characters.
|
|
103
|
+
value = value.replace('.', '')
|
|
104
|
+
|
|
105
|
+
# Proceed only if there's a comma, indicating a likely full address
|
|
106
|
+
if ',' in value:
|
|
107
|
+
try:
|
|
108
|
+
MediLink_ConfigLoader.log("Attempting to resolve address via regex...")
|
|
109
|
+
# Retrieve common city names from configuration and prepare a regex pattern
|
|
110
|
+
common_cities = config.get('cities', [])
|
|
111
|
+
city_pattern = '|'.join(re.escape(city) for city in common_cities)
|
|
112
|
+
city_regex_pattern = r'(?P<City>{})'.format(city_pattern)
|
|
113
|
+
city_regex = re.compile(city_regex_pattern, re.IGNORECASE)
|
|
114
|
+
|
|
115
|
+
# Search for a common city in the address
|
|
116
|
+
city_match = city_regex.search(value)
|
|
117
|
+
|
|
118
|
+
if city_match:
|
|
119
|
+
# Extract city name and partition the value around it
|
|
120
|
+
city = city_match.group('City').upper()
|
|
121
|
+
street, _, remainder = value.partition(city)
|
|
122
|
+
|
|
123
|
+
# Regex pattern to find state and zip code in the remainder
|
|
124
|
+
address_pattern = r',\s*(?P<State>[A-Z]{2})\s*(?P<Zip>\d{5}(?:-\d{4})?)?'
|
|
125
|
+
match = re.search(address_pattern, remainder)
|
|
126
|
+
|
|
127
|
+
if match:
|
|
128
|
+
# Update parsed address components
|
|
129
|
+
parsed_address_components['City'] = city
|
|
130
|
+
parsed_address_components['State'] = match.group('State')
|
|
131
|
+
parsed_address_components['Zip Code'] = match.group('Zip')
|
|
132
|
+
# Return formatted street address, enforcing significant length
|
|
133
|
+
return enforce_significant_length(street.strip())
|
|
134
|
+
else:
|
|
135
|
+
# Fallback regex for parsing addresses without a common city
|
|
136
|
+
address_pattern = r'(?P<Street>[\w\s]+),?\s+(?P<City>[\w\s]+),\s*(?P<State>[A-Z]{2})\s*(?P<Zip>\d{5}(-\d{4})?)'
|
|
137
|
+
match = re.match(address_pattern, value)
|
|
138
|
+
|
|
139
|
+
if match:
|
|
140
|
+
# Update parsed address components
|
|
141
|
+
parsed_address_components['City'] = match.group('City')
|
|
142
|
+
parsed_address_components['State'] = match.group('State')
|
|
143
|
+
parsed_address_components['Zip Code'] = match.group('Zip')
|
|
144
|
+
# Return formatted street address, enforcing significant length
|
|
145
|
+
return enforce_significant_length(match.group('Street').strip())
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
# Handle exceptions by logging and offering to correct data manually
|
|
149
|
+
print("Address format error: Unable to parse address '{}'. Error: {}".format(value, e))
|
|
150
|
+
app_control.set_pause_status(True)
|
|
151
|
+
open_csv_for_editing(CSV_FILE_PATH)
|
|
152
|
+
manage_script_pause(csv_data, e, reverse_mapping)
|
|
153
|
+
# Return original value with spaces formatted, enforcing significant length
|
|
154
|
+
return enforce_significant_length(value.replace(' ', '{Space}'))
|
|
155
|
+
else:
|
|
156
|
+
# If no comma is present, treat the input as a simple street name
|
|
157
|
+
formatted_value = value.replace(' ', '{Space}')
|
|
158
|
+
enforced_format = enforce_significant_length(formatted_value)
|
|
159
|
+
return enforced_format
|
|
160
|
+
|
|
161
|
+
# Fallback return in case no address components are matched even though a comma was present
|
|
162
|
+
return enforce_significant_length(value.replace(' ', '{Space}'))
|
|
163
|
+
|
|
164
|
+
def format_zip(value):
|
|
165
|
+
# Ensure the value is a string, in case it's provided as an integer
|
|
166
|
+
value_str = str(value)
|
|
167
|
+
# Return only the first 5 characters of the zip code
|
|
168
|
+
return value_str[:5]
|
|
169
|
+
|
|
170
|
+
def format_data(medisoft_field, value, csv_data, reverse_mapping, parsed_address_components):
|
|
171
|
+
if medisoft_field == 'Patient Name':
|
|
172
|
+
formatted_value = format_name(value)
|
|
173
|
+
elif medisoft_field == 'Birth Date':
|
|
174
|
+
formatted_value = format_date(value)
|
|
175
|
+
elif medisoft_field == 'Phone':
|
|
176
|
+
formatted_value = format_phone(value)
|
|
177
|
+
elif medisoft_field == 'Phone #2':
|
|
178
|
+
formatted_value = format_phone(value)
|
|
179
|
+
elif medisoft_field == 'Gender':
|
|
180
|
+
formatted_value = format_gender(value)
|
|
181
|
+
elif medisoft_field == 'Street':
|
|
182
|
+
formatted_value = format_street(value, csv_data, reverse_mapping, parsed_address_components)
|
|
183
|
+
elif medisoft_field == 'Zip Code':
|
|
184
|
+
formatted_value = format_zip(value)
|
|
185
|
+
elif medisoft_field == 'Primary Policy Number':
|
|
186
|
+
formatted_value = format_policy(value)
|
|
187
|
+
elif medisoft_field == 'Secondary Policy Number':
|
|
188
|
+
formatted_value = format_policy(value)
|
|
189
|
+
elif medisoft_field == 'Primary Group Number':
|
|
190
|
+
formatted_value = format_policy(value)
|
|
191
|
+
elif medisoft_field == 'Secondary Group Number':
|
|
192
|
+
formatted_value = format_policy(value)
|
|
193
|
+
else:
|
|
194
|
+
formatted_value = str(value) # Ensure value is always a string
|
|
195
|
+
|
|
196
|
+
formatted_value = formatted_value.replace(',', '{,}').replace(' ', '{Space}')
|
|
197
|
+
ahk_command = 'SendInput, {}{{Enter}}'.format(formatted_value)
|
|
198
|
+
return ahk_command
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Using docx-utils 0.1.3,
|
|
3
|
+
|
|
4
|
+
This script parses a .docx file containing a table of patient information and extracts
|
|
5
|
+
relevant data into a dictionary. Each row in the table corresponds to a new patient,
|
|
6
|
+
and the data from each cell is parsed into specific variables. The resulting dictionary
|
|
7
|
+
uses the 'Patient ID Number' as keys and lists containing 'Diagnosis Code',
|
|
8
|
+
'Left or Right Eye', and 'Femto yes or no' as values.
|
|
9
|
+
|
|
10
|
+
Functions:
|
|
11
|
+
parse_docx(filepath): Reads the .docx file and constructs the patient data dictionary.
|
|
12
|
+
parse_patient_id(text): Extracts the Patient ID Number from the text.
|
|
13
|
+
parse_diagnosis_code(text): Extracts the Diagnosis Code from the text.
|
|
14
|
+
parse_left_or_right_eye(text): Extracts the eye information (Left or Right) from the text.
|
|
15
|
+
parse_femto_yes_or_no(text): Extracts the Femto information (yes or no) from the text.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from docx import Document
|
|
19
|
+
|
|
20
|
+
def parse_docx(filepath):
|
|
21
|
+
# Open the .docx file
|
|
22
|
+
doc = Document(filepath)
|
|
23
|
+
|
|
24
|
+
# Initialize the dictionary to store data
|
|
25
|
+
patient_data = {}
|
|
26
|
+
|
|
27
|
+
# Assuming the first table contains the required data
|
|
28
|
+
table = doc.tables[0]
|
|
29
|
+
|
|
30
|
+
# Iterate over the rows in the table
|
|
31
|
+
for row in table.rows[1:]: # Skip header row if it exists
|
|
32
|
+
cells = row.cells
|
|
33
|
+
|
|
34
|
+
# Extract and parse data from each cell
|
|
35
|
+
patient_id = parse_patient_id(cells[0].text.strip())
|
|
36
|
+
diagnosis_code = parse_diagnosis_code(cells[1].text.strip())
|
|
37
|
+
left_or_right_eye = parse_left_or_right_eye(cells[2].text.strip())
|
|
38
|
+
femto_yes_or_no = parse_femto_yes_or_no(cells[3].text.strip())
|
|
39
|
+
|
|
40
|
+
# Construct the dictionary entry
|
|
41
|
+
patient_data[patient_id] = [diagnosis_code, left_or_right_eye, femto_yes_or_no]
|
|
42
|
+
|
|
43
|
+
return patient_data
|
|
44
|
+
|
|
45
|
+
def parse_patient_id(text):
|
|
46
|
+
# Implement parsing logic for Patient ID Number
|
|
47
|
+
# Example: Assume the ID is the first part of the text, separated by a space or newline
|
|
48
|
+
return text.split()[0]
|
|
49
|
+
|
|
50
|
+
def parse_diagnosis_code(text):
|
|
51
|
+
# Implement parsing logic for Diagnosis Code
|
|
52
|
+
# Example: Extract the code from a known pattern or location in the text
|
|
53
|
+
return text.split(':')[1].strip() if ':' in text else text
|
|
54
|
+
|
|
55
|
+
def parse_left_or_right_eye(text):
|
|
56
|
+
# Implement parsing logic for Left or Right Eye
|
|
57
|
+
# Example: Assume the text contains 'Left' or 'Right' and extract it
|
|
58
|
+
if 'Left' in text:
|
|
59
|
+
return 'Left'
|
|
60
|
+
elif 'Right' in text:
|
|
61
|
+
return 'Right'
|
|
62
|
+
else:
|
|
63
|
+
return 'Unknown'
|
|
64
|
+
|
|
65
|
+
def parse_femto_yes_or_no(text):
|
|
66
|
+
# Implement parsing logic for Femto yes or no
|
|
67
|
+
# Example: Check for presence of keywords 'yes' or 'no'
|
|
68
|
+
if 'yes' in text.lower():
|
|
69
|
+
return 'Yes'
|
|
70
|
+
elif 'no' in text.lower():
|
|
71
|
+
return 'No'
|
|
72
|
+
else:
|
|
73
|
+
return 'Unknown'
|
|
74
|
+
|
|
75
|
+
# Placeholder function call (replace 'path_to_docx' with the actual file path)
|
|
76
|
+
filepath = 'path_to_docx'
|
|
77
|
+
patient_data_dict = parse_docx(filepath)
|
|
78
|
+
|
|
79
|
+
# Print the resulting dictionary
|
|
80
|
+
print(patient_data_dict)
|
MediBot/MediPost.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Takes CSV from MediLink_Down.py and utilizes config and MediBot to post the CSV to Medisoft.
|
|
3
|
+
This script now also processes ERAs and responses (277CA/277A) to update claims status and finalize billing records.
|
|
4
|
+
Handles parsing and cleaning of input CSV files to ensure data accuracy and compliance with Medisoft requirements.
|
|
5
|
+
"""
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
def load_csv(file_path):
|
|
5
|
+
# Loads a CSV file and returns a pandas DataFrame
|
|
6
|
+
return pd.read_csv(file_path, header=None)
|
|
7
|
+
|
|
8
|
+
def segment_data(data):
|
|
9
|
+
# Segments the data into individual patient records
|
|
10
|
+
|
|
11
|
+
patient_records = []
|
|
12
|
+
current_record = []
|
|
13
|
+
|
|
14
|
+
for line in data.itertuples(index=False):
|
|
15
|
+
# Convert the tuple to a list to process it easier
|
|
16
|
+
line = list(line)
|
|
17
|
+
# Flatten the list and filter out NaN values
|
|
18
|
+
line = [item for item in line if pd.notna(item)]
|
|
19
|
+
|
|
20
|
+
if line: # Make sure there is data in the line
|
|
21
|
+
# Check for the delimiter indicating a new patient record
|
|
22
|
+
if 'PATIENT INFORMATION' in line[0]:
|
|
23
|
+
if current_record:
|
|
24
|
+
# If there's an existing record, this means we've reached a new one
|
|
25
|
+
# Save the current record and start a new one
|
|
26
|
+
patient_records.append(current_record)
|
|
27
|
+
current_record = []
|
|
28
|
+
# Add the line to the current patient record
|
|
29
|
+
current_record.extend(line)
|
|
30
|
+
|
|
31
|
+
# Don't forget to add the last record after exiting the loop
|
|
32
|
+
if current_record:
|
|
33
|
+
patient_records.append(current_record)
|
|
34
|
+
|
|
35
|
+
return patient_records
|
|
36
|
+
|
|
37
|
+
# Function to extract key-value pairs from a patient record segment
|
|
38
|
+
def extract_patient_data(patient_record):
|
|
39
|
+
patient_data = {
|
|
40
|
+
"Name": None,
|
|
41
|
+
"Patient ID": None,
|
|
42
|
+
"Address": None,
|
|
43
|
+
"Home Phone": None,
|
|
44
|
+
"DOB": None,
|
|
45
|
+
"Gender": None
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Function to extract value after a specific key in the patient record
|
|
49
|
+
def extract_value_after_key(lines, key):
|
|
50
|
+
for index, line in enumerate(lines):
|
|
51
|
+
if key in line:
|
|
52
|
+
try:
|
|
53
|
+
split_line = line.split('\n')
|
|
54
|
+
if len(split_line) > 1:
|
|
55
|
+
# Return the value only if it exists after the newline character
|
|
56
|
+
return split_line[1].strip()
|
|
57
|
+
except AttributeError:
|
|
58
|
+
# Handle the case where 'line' is not a string and doesn't have the 'split' method
|
|
59
|
+
print("Error extracting value after key:", line)
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
# For each key in patient_data, extract its value from the patient_record
|
|
63
|
+
for key in patient_data.keys():
|
|
64
|
+
patient_data[key] = extract_value_after_key(patient_record, key)
|
|
65
|
+
return patient_data
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def parse_insurance_info(patient_record):
|
|
69
|
+
insurance_data = {
|
|
70
|
+
"Primary Insurance": None,
|
|
71
|
+
"Primary Policy Number": None,
|
|
72
|
+
"Primary Group Number": None,
|
|
73
|
+
"Secondary Insurance": None,
|
|
74
|
+
"Secondary Policy Number": None,
|
|
75
|
+
"Secondary Group Number": None
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
insurance_section_started = False
|
|
79
|
+
secondary_insurance_detected = False
|
|
80
|
+
group_header_detected = False
|
|
81
|
+
|
|
82
|
+
for element in patient_record:
|
|
83
|
+
if 'INSURANCE INFORMATION' in element:
|
|
84
|
+
insurance_section_started = True
|
|
85
|
+
secondary_insurance_detected = False
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
if insurance_section_started:
|
|
89
|
+
split_element = element.split('\n')
|
|
90
|
+
if 'Primary Insurance' in element:
|
|
91
|
+
insurance_data["Primary Insurance"] = element.split('\n')[1].strip() if len(element.split('\n')) > 1 else None
|
|
92
|
+
elif 'Secondary Insurance' in element and len(split_element) > 1 and split_element[1].strip():
|
|
93
|
+
insurance_data["Secondary Insurance"] = element.split('\n')[1].strip() if len(element.split('\n')) > 1 else None
|
|
94
|
+
secondary_insurance_detected = True
|
|
95
|
+
elif 'Policy Number' in element:
|
|
96
|
+
split_element = element.split('\n')
|
|
97
|
+
if len(split_element) > 1:
|
|
98
|
+
if not insurance_data["Primary Policy Number"]:
|
|
99
|
+
insurance_data["Primary Policy Number"] = split_element[1].strip()
|
|
100
|
+
elif secondary_insurance_detected and not insurance_data["Secondary Policy Number"]:
|
|
101
|
+
insurance_data["Secondary Policy Number"] = split_element[1].strip()
|
|
102
|
+
elif 'Group Number' in element:
|
|
103
|
+
#print("Group Detected: ", element, secondary_insurance_detected)
|
|
104
|
+
group_header_detected = not group_header_detected # toggle between T/F to proxy as first or second position.
|
|
105
|
+
split_element = element.split('\n')
|
|
106
|
+
if len(split_element) > 1:
|
|
107
|
+
if not insurance_data["Primary Group Number"] and group_header_detected:
|
|
108
|
+
insurance_data["Primary Group Number"] = split_element[1].strip()
|
|
109
|
+
elif secondary_insurance_detected and not insurance_data["Secondary Group Number"] and not group_header_detected:
|
|
110
|
+
insurance_data["Secondary Group Number"] = split_element[1].strip()
|
|
111
|
+
|
|
112
|
+
return insurance_data
|
|
113
|
+
|
|
114
|
+
def structure_data(patient_data_list):
|
|
115
|
+
# Define the column headers based on the sample data provided earlier
|
|
116
|
+
column_headers = [
|
|
117
|
+
"Name",
|
|
118
|
+
"Patient ID",
|
|
119
|
+
"Address",
|
|
120
|
+
"Home Phone",
|
|
121
|
+
"DOB",
|
|
122
|
+
"Gender",
|
|
123
|
+
"Primary Insurance",
|
|
124
|
+
"Primary Policy Number",
|
|
125
|
+
"Primary Group Number",
|
|
126
|
+
"Secondary Insurance",
|
|
127
|
+
"Secondary Policy Number",
|
|
128
|
+
"Secondary Group Number"
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
# Initialize a list to hold structured patient records
|
|
132
|
+
structured_patient_records = []
|
|
133
|
+
|
|
134
|
+
# Iterate over each patient record in the list
|
|
135
|
+
for patient_record in patient_data_list:
|
|
136
|
+
# Extract the basic patient data
|
|
137
|
+
patient_data = extract_patient_data(patient_record)
|
|
138
|
+
# Extract the insurance information
|
|
139
|
+
insurance_data = parse_insurance_info(patient_record)
|
|
140
|
+
# Merge the two dictionaries
|
|
141
|
+
full_patient_data = {**patient_data, **insurance_data}
|
|
142
|
+
|
|
143
|
+
# Add the cleaned and transformed data to the list
|
|
144
|
+
structured_patient_records.append(full_patient_data)
|
|
145
|
+
|
|
146
|
+
# Create the DataFrame with the structured patient data
|
|
147
|
+
structured_patient_df = pd.DataFrame(structured_patient_records, columns=column_headers)
|
|
148
|
+
|
|
149
|
+
# Return the structured DataFrame
|
|
150
|
+
return structured_patient_df
|
|
151
|
+
|
|
152
|
+
def validate_data(data_frame):
|
|
153
|
+
# Performing Quality Assurance and Validation checks on the structured data
|
|
154
|
+
|
|
155
|
+
# Completeness Check: Check for missing values in critical fields
|
|
156
|
+
missing_values_check = data_frame.isnull().sum()
|
|
157
|
+
|
|
158
|
+
# Consistency Check: Ensure data formats are consistent
|
|
159
|
+
date_format_check = data_frame['DOB'].apply(lambda x: bool(re.match(r'\d{4}-\d{2}-\d{2}', x)) if pd.notnull(x) else True)
|
|
160
|
+
phone_format_check = data_frame['Home Phone'].apply(lambda x: bool(re.match(r'\+\d-\d{3}-\d{3}-\d{4}', x)) if pd.notnull(x) else True)
|
|
161
|
+
|
|
162
|
+
# Anomaly Detection: This can be complex and domain-specific. As a basic check, we can look for outliers in data like dates.
|
|
163
|
+
dob_anomalies_check = data_frame['DOB'].describe()
|
|
164
|
+
|
|
165
|
+
# Compile the results of the checks
|
|
166
|
+
validation_results = {
|
|
167
|
+
"Missing Values Check": missing_values_check,
|
|
168
|
+
"Date Format Consistency": all(date_format_check),
|
|
169
|
+
"Phone Format Consistency": all(phone_format_check),
|
|
170
|
+
"DOB Anomalies Check": dob_anomalies_check
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
print(validation_results) # Display validation results
|
|
174
|
+
return data_frame # Return the validated DataFrame
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# Main function to orchestrate the cleaning process
|
|
178
|
+
def clean_patient_data(file_path):
|
|
179
|
+
# Load the CSV file
|
|
180
|
+
sxpatient_data = load_csv(file_path)
|
|
181
|
+
|
|
182
|
+
# Segment the data
|
|
183
|
+
segmented_patient_records = segment_data(sxpatient_data)
|
|
184
|
+
|
|
185
|
+
# Structure the data
|
|
186
|
+
structured_data_frame = structure_data(segmented_patient_records)
|
|
187
|
+
|
|
188
|
+
# Validate the data
|
|
189
|
+
validated_data = validate_data(structured_data_frame)
|
|
190
|
+
|
|
191
|
+
return validated_data
|
|
192
|
+
|
|
193
|
+
# Path to the CSV file with escaped backslashes
|
|
194
|
+
file_path_sxpatient = 'C:\\Users\\danie\\OneDrive\\Desktop\\CSV02012024.CSV'
|
|
195
|
+
# Define the file path for the output CSV file
|
|
196
|
+
output_file_path = 'G:\\My Drive\\CocoWave\\XP typing bot\\cleaned_FEB01SXcsv_group.csv'
|
|
197
|
+
|
|
198
|
+
# Call the main function to clean the patient data
|
|
199
|
+
cleaned_patient_data = clean_patient_data(file_path_sxpatient)
|
|
200
|
+
|
|
201
|
+
# Display the first few rows of the cleaned and validated data to verify the output
|
|
202
|
+
print(cleaned_patient_data.head())
|
|
203
|
+
|
|
204
|
+
# Save the processed data to a CSV file
|
|
205
|
+
cleaned_patient_data.to_csv(output_file_path, index=False)
|
|
206
|
+
|
|
207
|
+
print(f"Processed data saved to {output_file_path}")
|
|
208
|
+
|
|
209
|
+
# Development Roadmap
|
|
210
|
+
|
|
211
|
+
# Do not delete leading zeros from insurance numbers
|
MediBot/__init__.py
ADDED
|
File without changes
|
MediBot/update_json.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
|
|
5
|
+
def update_csv_path(json_file, new_path):
|
|
6
|
+
try:
|
|
7
|
+
with open(json_file, 'r', encoding='utf-8') as file:
|
|
8
|
+
try:
|
|
9
|
+
data = json.load(file, object_pairs_hook=OrderedDict)
|
|
10
|
+
except ValueError as decode_err:
|
|
11
|
+
print("Error decoding JSON file '{}': {}".format(json_file, decode_err))
|
|
12
|
+
sys.exit(1)
|
|
13
|
+
|
|
14
|
+
# Ensure correct backslash formatting for JSON
|
|
15
|
+
if "\\" in new_path and "\\\\" not in new_path:
|
|
16
|
+
formatted_path = new_path.replace("\\", "\\\\")
|
|
17
|
+
else:
|
|
18
|
+
formatted_path = new_path
|
|
19
|
+
|
|
20
|
+
data['CSV_FILE_PATH'] = formatted_path
|
|
21
|
+
|
|
22
|
+
with open(json_file, 'w', encoding='utf-8') as file:
|
|
23
|
+
try:
|
|
24
|
+
json.dump(data, file, ensure_ascii=False, indent=4)
|
|
25
|
+
except ValueError as encode_err:
|
|
26
|
+
print("Error encoding JSON data to file '{}': {}".format(json_file, encode_err))
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
|
|
29
|
+
except IOError as io_err:
|
|
30
|
+
print("Error accessing file '{}': {}".format(json_file, io_err))
|
|
31
|
+
sys.exit(1)
|
|
32
|
+
except Exception as e:
|
|
33
|
+
print("An unexpected error occurred: {}".format(e))
|
|
34
|
+
sys.exit(1)
|
|
35
|
+
|
|
36
|
+
if __name__ == "__main__":
|
|
37
|
+
if len(sys.argv) == 3:
|
|
38
|
+
json_path = sys.argv[1]
|
|
39
|
+
new_csv_path = sys.argv[2]
|
|
40
|
+
update_csv_path(json_path, new_csv_path)
|
|
41
|
+
else:
|
|
42
|
+
print("Usage: update_json.py <path_to_json_file> <new_csv_path>")
|
|
43
|
+
sys.exit(1)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
import requests
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
def check_internet_connection():
|
|
8
|
+
try:
|
|
9
|
+
requests.get("http://www.google.com", timeout=5)
|
|
10
|
+
return True
|
|
11
|
+
except requests.ConnectionError:
|
|
12
|
+
return False
|
|
13
|
+
|
|
14
|
+
def upgrade_medicafe(package):
|
|
15
|
+
try:
|
|
16
|
+
# Check internet connection
|
|
17
|
+
if not check_internet_connection():
|
|
18
|
+
print("Error: No internet connection detected. Please check your internet connection and try again.")
|
|
19
|
+
sys.exit(1)
|
|
20
|
+
|
|
21
|
+
total_progress = 200 # Total progress for two runs
|
|
22
|
+
|
|
23
|
+
with tqdm(total=total_progress, desc="Upgrading %s" % package, unit="%") as progress_bar:
|
|
24
|
+
stdout_accumulator = b""
|
|
25
|
+
stderr_accumulator = b""
|
|
26
|
+
|
|
27
|
+
for _ in range(2): # Run pip install twice
|
|
28
|
+
process = subprocess.Popen([sys.executable, '-m', 'pip', 'install', '--upgrade', package, '--no-cache-dir', '--disable-pip-version-check', '--no-deps'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
29
|
+
stdout, stderr = process.communicate()
|
|
30
|
+
stdout_accumulator += stdout
|
|
31
|
+
stderr_accumulator += stderr
|
|
32
|
+
|
|
33
|
+
if process.returncode != 0:
|
|
34
|
+
# If the return code is non-zero, print error details
|
|
35
|
+
print("Error: Upgrade failed. Details:")
|
|
36
|
+
print("stdout:", stdout)
|
|
37
|
+
print("stderr:", stderr)
|
|
38
|
+
sys.exit(1)
|
|
39
|
+
|
|
40
|
+
progress_bar.update(total_progress // 2) # Update progress bar
|
|
41
|
+
|
|
42
|
+
# Add a 3-second sleep between runs
|
|
43
|
+
time.sleep(3)
|
|
44
|
+
|
|
45
|
+
progress_bar.update(total_progress // 2) # Update progress bar
|
|
46
|
+
print("stdout:", stdout_accumulator.decode("utf-8"))
|
|
47
|
+
print("stderr:", stderr_accumulator.decode("utf-8"))
|
|
48
|
+
time.sleep(1)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
# Log any other exceptions
|
|
51
|
+
print("Error:", e)
|
|
52
|
+
time.sleep(3)
|
|
53
|
+
sys.exit(1)
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
medicafe_package = "medicafe"
|
|
57
|
+
upgrade_medicafe(medicafe_package)
|