medicafe 0.250723.3__tar.gz → 0.250723.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of medicafe might be problematic. Click here for more details.
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot.py +6 -3
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_Preprocessor_lib.py +35 -23
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_UI.py +14 -4
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_docx_decoder.py +145 -60
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_API_v3.py +7 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Parser.py +80 -54
- {medicafe-0.250723.3 → medicafe-0.250723.5}/PKG-INFO +1 -1
- {medicafe-0.250723.3 → medicafe-0.250723.5}/medicafe.egg-info/PKG-INFO +1 -1
- {medicafe-0.250723.3 → medicafe-0.250723.5}/setup.py +1 -1
- {medicafe-0.250723.3 → medicafe-0.250723.5}/LICENSE +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MANIFEST.in +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot.bat +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_Charges.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_Crosswalk_Library.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_Post.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_Preprocessor.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/MediBot_dataformat_library.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/PDF_to_CSV_Cleaner.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/__init__.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/update_json.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediBot/update_medicafe.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_837p_cob_library.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_837p_encoder.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_837p_encoder_library.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_837p_utilities.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_API_Generator.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_API_v2.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_APIs.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Azure.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_ClaimStatus.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_ConfigLoader.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_DataMgmt.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Decoder.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Deductible.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Deductible_Validator.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Down.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Gmail.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_GraphQL.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Mailer.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Scan.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Scheduler.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_UI.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_Up.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/MediLink_batch.bat +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/Soumit_api.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/__init__.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/openssl.cnf +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/test.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/test_cob_library.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/test_validation.py +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/MediLink/webapp.html +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/README.md +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/medicafe.egg-info/SOURCES.txt +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/medicafe.egg-info/dependency_links.txt +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/medicafe.egg-info/not-zip-safe +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/medicafe.egg-info/requires.txt +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/medicafe.egg-info/top_level.txt +0 -0
- {medicafe-0.250723.3 → medicafe-0.250723.5}/setup.cfg +0 -0
|
@@ -393,9 +393,11 @@ if __name__ == "__main__":
|
|
|
393
393
|
|
|
394
394
|
# Check if there are patients left to process
|
|
395
395
|
if len(patients_to_process) == 0:
|
|
396
|
-
|
|
396
|
+
print("\nAll patients have been processed. Continue anyway?: ", end='', flush=True)
|
|
397
|
+
proceed = input().lower().strip() in ['yes', 'y']
|
|
397
398
|
else:
|
|
398
|
-
|
|
399
|
+
print("\nDo you want to proceed with the {} remaining patient(s)? (yes/no): ".format(len(patients_to_process)), end='', flush=True)
|
|
400
|
+
proceed = input().lower().strip() in ['yes', 'y']
|
|
399
401
|
|
|
400
402
|
# TODO: Here is where we need to add the step where we move to MediBot_Charges.
|
|
401
403
|
# The return is an enriched dataset to be picked up by MediBot which means we need to return:
|
|
@@ -406,7 +408,8 @@ if __name__ == "__main__":
|
|
|
406
408
|
print(" Press 'F8' to create a New Patient.")
|
|
407
409
|
print(" Press 'F12' to begin data entry.")
|
|
408
410
|
print(" Press 'F11' at any time to Pause.")
|
|
409
|
-
|
|
411
|
+
print("\n*** Press [Enter] when ready to begin! ***")
|
|
412
|
+
input()
|
|
410
413
|
MediLink_ConfigLoader.log("Opening Medisoft...")
|
|
411
414
|
open_medisoft(app_control.get_medisoft_shortcut())
|
|
412
415
|
app_control.set_pause_status(True)
|
|
@@ -517,30 +517,48 @@ def update_diagnosis_codes(csv_data):
|
|
|
517
517
|
|
|
518
518
|
MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
|
|
519
519
|
|
|
520
|
-
#
|
|
521
|
-
#
|
|
520
|
+
# PERFORMANCE OPTIMIZATION: Use os.scandir() for more efficient file system operations
|
|
521
|
+
# This reduces the number of file system calls and improves performance with large directories
|
|
522
522
|
valid_files = []
|
|
523
523
|
try:
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
524
|
+
# Use os.scandir() for better performance (XP/3.4.4 compatible)
|
|
525
|
+
with os.scandir(local_storage_path) as entries:
|
|
526
|
+
for entry in entries:
|
|
527
|
+
if entry.name.endswith('.docx'):
|
|
528
|
+
# Get file modification time in single operation
|
|
529
|
+
try:
|
|
530
|
+
stat_info = entry.stat()
|
|
531
|
+
mtime = stat_info.st_mtime
|
|
532
|
+
if threshold_start <= datetime.fromtimestamp(mtime) <= threshold_end:
|
|
533
|
+
valid_files.append(entry.path)
|
|
534
|
+
except (OSError, ValueError):
|
|
535
|
+
# Skip files with invalid modification times
|
|
536
|
+
continue
|
|
535
537
|
except OSError:
|
|
536
538
|
MediLink_ConfigLoader.log("Error accessing directory: {}".format(local_storage_path), level="ERROR")
|
|
537
539
|
return
|
|
538
540
|
|
|
541
|
+
# PERFORMANCE OPTIMIZATION: Pre-process patient IDs for efficient lookup
|
|
542
|
+
# Create a set of patient IDs from CSV data for faster lookups
|
|
543
|
+
patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
|
|
544
|
+
|
|
545
|
+
# PERFORMANCE OPTIMIZATION: Pre-convert surgery dates to string format
|
|
546
|
+
# Convert all surgery dates to string format once to avoid repeated conversions in loops
|
|
547
|
+
surgery_date_strings = {}
|
|
548
|
+
for row in csv_data:
|
|
549
|
+
patient_id = row.get('Patient ID', '').strip()
|
|
550
|
+
surgery_date = row.get('Surgery Date')
|
|
551
|
+
if surgery_date != datetime.min:
|
|
552
|
+
surgery_date_strings[patient_id] = surgery_date.strftime("%m-%d-%Y")
|
|
553
|
+
else:
|
|
554
|
+
surgery_date_strings[patient_id] = ''
|
|
555
|
+
|
|
539
556
|
# Process valid DOCX files
|
|
540
557
|
for filepath in valid_files:
|
|
541
558
|
MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
|
|
542
559
|
try:
|
|
543
560
|
patient_data = parse_docx(filepath, surgery_dates) # Pass surgery_dates to parse_docx
|
|
561
|
+
# PERFORMANCE OPTIMIZATION: Use defaultdict for more efficient dictionary operations
|
|
544
562
|
for patient_id, service_dates in patient_data.items():
|
|
545
563
|
if patient_id not in all_patient_data:
|
|
546
564
|
all_patient_data[patient_id] = {}
|
|
@@ -556,9 +574,6 @@ def update_diagnosis_codes(csv_data):
|
|
|
556
574
|
# Debug logging for all_patient_data
|
|
557
575
|
MediLink_ConfigLoader.log("All patient data collected from DOCX files: {}".format(all_patient_data), level="DEBUG")
|
|
558
576
|
|
|
559
|
-
# Extract patient IDs from csv_data for efficient matching
|
|
560
|
-
patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
|
|
561
|
-
|
|
562
577
|
# Check if any patient data was collected
|
|
563
578
|
if not all_patient_data or not patient_ids_in_csv.intersection(all_patient_data.keys()):
|
|
564
579
|
MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
|
|
@@ -570,20 +585,17 @@ def update_diagnosis_codes(csv_data):
|
|
|
570
585
|
# Initialize counter for updated rows
|
|
571
586
|
updated_count = 0
|
|
572
587
|
|
|
588
|
+
# PERFORMANCE OPTIMIZATION: Single pass through CSV data with pre-processed lookups
|
|
573
589
|
# Update the "Default Diagnosis #1" column in the CSV data
|
|
574
590
|
for row_num, row in enumerate(csv_data, start=1):
|
|
575
591
|
patient_id = row.get('Patient ID', '').strip()
|
|
592
|
+
# Use pre-processed patient ID lookup for efficiency
|
|
576
593
|
if patient_id not in patient_ids_in_csv:
|
|
577
594
|
continue # Skip rows that do not match any patient ID
|
|
578
595
|
|
|
579
596
|
MediLink_ConfigLoader.log("Processing row number {}.".format(row_num), level="DEBUG")
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
# Convert surgery_date to string format for lookup
|
|
583
|
-
if surgery_date != datetime.min:
|
|
584
|
-
surgery_date_str = surgery_date.strftime("%m-%d-%Y")
|
|
585
|
-
else:
|
|
586
|
-
surgery_date_str = ''
|
|
597
|
+
# Use pre-converted surgery date string for efficient lookup
|
|
598
|
+
surgery_date_str = surgery_date_strings.get(patient_id, '')
|
|
587
599
|
|
|
588
600
|
MediLink_ConfigLoader.log("Patient ID: {}, Surgery Date: {}".format(patient_id, surgery_date_str), level="DEBUG")
|
|
589
601
|
|
|
@@ -147,7 +147,8 @@ def display_patient_selection_menu(csv_data, reverse_mapping, proceed_as_medicar
|
|
|
147
147
|
selected_indices, selected_patient_ids = display_patient_list(csv_data, reverse_mapping, exclude_medicare=True)
|
|
148
148
|
|
|
149
149
|
print("-" * 60)
|
|
150
|
-
|
|
150
|
+
print("\nDo you want to proceed with the selected patients? (yes/no): ", end='', flush=True)
|
|
151
|
+
proceed = input().lower().strip() in ['yes', 'y']
|
|
151
152
|
|
|
152
153
|
if not proceed:
|
|
153
154
|
display_menu_header("Patient Selection for Today's Data Entry")
|
|
@@ -156,7 +157,8 @@ def display_patient_selection_menu(csv_data, reverse_mapping, proceed_as_medicar
|
|
|
156
157
|
|
|
157
158
|
while True:
|
|
158
159
|
while True:
|
|
159
|
-
|
|
160
|
+
print("\nEnter the number(s) of the patients you wish to proceed with \n(e.g., 1,3,5): ", end='', flush=True)
|
|
161
|
+
selection = input().strip()
|
|
160
162
|
if not selection:
|
|
161
163
|
print("Invalid entry. Please provide at least one number.")
|
|
162
164
|
continue
|
|
@@ -194,6 +196,8 @@ def display_menu_header(title):
|
|
|
194
196
|
print("\n" + "-" * 60)
|
|
195
197
|
print(title)
|
|
196
198
|
print("-" * 60)
|
|
199
|
+
# Force flush for Windows XP compatibility
|
|
200
|
+
sys.stdout.flush()
|
|
197
201
|
|
|
198
202
|
def handle_user_interaction(interaction_mode, error_message):
|
|
199
203
|
# Import here to avoid circular imports
|
|
@@ -228,7 +232,8 @@ def handle_user_interaction(interaction_mode, error_message):
|
|
|
228
232
|
print("3: Go back two patients and redo")
|
|
229
233
|
print("4: Exit script")
|
|
230
234
|
print("-" * 60)
|
|
231
|
-
|
|
235
|
+
print("Enter your choice (1/2/3/4): ", end='', flush=True)
|
|
236
|
+
choice = input().strip()
|
|
232
237
|
|
|
233
238
|
if choice == '1':
|
|
234
239
|
print("Selected: 'Retry last entry'. Please press 'F12' to continue.")
|
|
@@ -254,10 +259,15 @@ def user_interaction(csv_data, interaction_mode, error_message, reverse_mapping)
|
|
|
254
259
|
|
|
255
260
|
if interaction_mode == 'triage':
|
|
256
261
|
display_menu_header(" =(^.^)= Welcome to MediBot! =(^.^)=")
|
|
262
|
+
|
|
263
|
+
# Force flush for Windows XP compatibility
|
|
264
|
+
sys.stdout.flush()
|
|
257
265
|
|
|
258
266
|
while True:
|
|
259
267
|
try:
|
|
260
|
-
|
|
268
|
+
# Use a more explicit prompt format for Windows XP
|
|
269
|
+
print("\nAm I processing Medicare patients? (yes/no): ", end='', flush=True)
|
|
270
|
+
response = input().lower().strip()
|
|
261
271
|
if response:
|
|
262
272
|
if response in ['yes', 'y']:
|
|
263
273
|
app_control.load_paths_from_config(medicare=True)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#MediBot_docx_decoder.py
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from collections import OrderedDict
|
|
4
|
-
import os, re, sys, zipfile
|
|
4
|
+
import os, re, sys, zipfile, pprint
|
|
5
5
|
from docx import Document
|
|
6
6
|
from lxml import etree
|
|
7
7
|
|
|
@@ -14,6 +14,56 @@ try:
|
|
|
14
14
|
except ImportError:
|
|
15
15
|
from MediLink import MediLink_ConfigLoader
|
|
16
16
|
|
|
17
|
+
# Pre-compile regex patterns for better performance (XP/3.4.4 compatible)
|
|
18
|
+
_DIAGNOSIS_CODE_PATTERN = re.compile(r'H\d{2}\.\d+')
|
|
19
|
+
_DAY_WEEK_PATTERN = re.compile(r"(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)")
|
|
20
|
+
_MONTH_DAY_PATTERN = re.compile(r"(JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER) \d{1,2}")
|
|
21
|
+
_YEAR_PATTERN = re.compile(r"\d{4}")
|
|
22
|
+
_YEAR_SPLIT_PATTERNS = [
|
|
23
|
+
re.compile(r'(\d{3}) (\d{1})'),
|
|
24
|
+
re.compile(r'(\d{1}) (\d{3})'),
|
|
25
|
+
re.compile(r'(\d{2}) (\d{2})')
|
|
26
|
+
]
|
|
27
|
+
_DIGIT_PARTS_PATTERN = re.compile(r'\b(\d{1,2})\b')
|
|
28
|
+
_COMMA_PATTERN = re.compile(r',')
|
|
29
|
+
|
|
30
|
+
# Pre-compile abbreviation patterns for normalize_text optimization
|
|
31
|
+
_MONTH_ABBR_PATTERNS = {
|
|
32
|
+
'JAN': re.compile(r'\bJAN\b', re.IGNORECASE),
|
|
33
|
+
'FEB': re.compile(r'\bFEB\b', re.IGNORECASE),
|
|
34
|
+
'MAR': re.compile(r'\bMAR\b', re.IGNORECASE),
|
|
35
|
+
'APR': re.compile(r'\bAPR\b', re.IGNORECASE),
|
|
36
|
+
'MAY': re.compile(r'\bMAY\b', re.IGNORECASE),
|
|
37
|
+
'JUN': re.compile(r'\bJUN\b', re.IGNORECASE),
|
|
38
|
+
'JUL': re.compile(r'\bJUL\b', re.IGNORECASE),
|
|
39
|
+
'AUG': re.compile(r'\bAUG\b', re.IGNORECASE),
|
|
40
|
+
'SEP': re.compile(r'\bSEP\b', re.IGNORECASE),
|
|
41
|
+
'OCT': re.compile(r'\bOCT\b', re.IGNORECASE),
|
|
42
|
+
'NOV': re.compile(r'\bNOV\b', re.IGNORECASE),
|
|
43
|
+
'DEC': re.compile(r'\bDEC\b', re.IGNORECASE)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
_DAY_ABBR_PATTERNS = {
|
|
47
|
+
'MON': re.compile(r'\bMON\b', re.IGNORECASE),
|
|
48
|
+
'TUE': re.compile(r'\bTUE\b', re.IGNORECASE),
|
|
49
|
+
'WED': re.compile(r'\bWED\b', re.IGNORECASE),
|
|
50
|
+
'THU': re.compile(r'\bTHU\b', re.IGNORECASE),
|
|
51
|
+
'FRI': re.compile(r'\bFRI\b', re.IGNORECASE),
|
|
52
|
+
'SAT': re.compile(r'\bSAT\b', re.IGNORECASE),
|
|
53
|
+
'SUN': re.compile(r'\bSUN\b', re.IGNORECASE)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Month and day mapping dictionaries
|
|
57
|
+
_MONTH_MAP = {
|
|
58
|
+
'JAN': 'JANUARY', 'FEB': 'FEBRUARY', 'MAR': 'MARCH', 'APR': 'APRIL',
|
|
59
|
+
'MAY': 'MAY', 'JUN': 'JUNE', 'JUL': 'JULY', 'AUG': 'AUGUST',
|
|
60
|
+
'SEP': 'SEPTEMBER', 'OCT': 'OCTOBER', 'NOV': 'NOVEMBER', 'DEC': 'DECEMBER'
|
|
61
|
+
}
|
|
62
|
+
_DAY_MAP = {
|
|
63
|
+
'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
|
|
64
|
+
'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
|
|
65
|
+
}
|
|
66
|
+
|
|
17
67
|
|
|
18
68
|
def parse_docx(filepath, surgery_dates): # Accept surgery_dates as a parameter
|
|
19
69
|
try:
|
|
@@ -186,10 +236,15 @@ def find_text_in_xml(extract_dir, target_text):
|
|
|
186
236
|
root = tree.getroot()
|
|
187
237
|
namespaces = root.nsmap
|
|
188
238
|
MediLink_ConfigLoader.log("Processing file: {}".format(file_path), level="DEBUG")
|
|
239
|
+
# More efficient: collect all text first, then search
|
|
240
|
+
all_text = []
|
|
189
241
|
for elem in root.xpath('//w:t', namespaces=namespaces):
|
|
190
|
-
if elem.text
|
|
191
|
-
|
|
192
|
-
|
|
242
|
+
if elem.text:
|
|
243
|
+
all_text.append(elem.text)
|
|
244
|
+
combined_text = ' '.join(all_text)
|
|
245
|
+
if target_pattern.search(combined_text):
|
|
246
|
+
MediLink_ConfigLoader.log("Found target text '{}' in file: {}".format(target_text, file_path), level="DEBUG")
|
|
247
|
+
return file_path
|
|
193
248
|
except etree.XMLSyntaxError as e:
|
|
194
249
|
MediLink_ConfigLoader.log("XMLSyntaxError parsing file {}: {}".format(file_path, e), level="ERROR")
|
|
195
250
|
except Exception as e:
|
|
@@ -211,18 +266,14 @@ def extract_date_from_file(file_path):
|
|
|
211
266
|
combined_text = ' '.join(collected_text)
|
|
212
267
|
combined_text = reassemble_year(combined_text) # Fix OCR splitting years
|
|
213
268
|
combined_text = normalize_text(combined_text) # Normalize abbreviations
|
|
214
|
-
combined_text =
|
|
269
|
+
combined_text = _COMMA_PATTERN.sub('', combined_text) # Remove commas if they exist
|
|
215
270
|
|
|
216
271
|
# Log the combined text
|
|
217
272
|
MediLink_ConfigLoader.log("Combined text from file '{}': {}".format(file_path, combined_text[:200]), level="DEBUG")
|
|
218
273
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
|
|
224
|
-
month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
|
|
225
|
-
year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
|
|
274
|
+
day_of_week = _DAY_WEEK_PATTERN.search(combined_text, re.IGNORECASE)
|
|
275
|
+
month_day = _MONTH_DAY_PATTERN.search(combined_text, re.IGNORECASE)
|
|
276
|
+
year_match = _YEAR_PATTERN.search(combined_text, re.IGNORECASE)
|
|
226
277
|
|
|
227
278
|
# Log the results of the regex searches
|
|
228
279
|
MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
|
|
@@ -276,18 +327,14 @@ def extract_date_from_content(xml_content):
|
|
|
276
327
|
combined_text = ' '.join(collected_text)
|
|
277
328
|
combined_text = reassemble_year(combined_text) # Fix OCR splitting years
|
|
278
329
|
combined_text = normalize_text(combined_text) # Normalize abbreviations
|
|
279
|
-
combined_text =
|
|
330
|
+
combined_text = _COMMA_PATTERN.sub('', combined_text) # Remove commas if they exist
|
|
280
331
|
|
|
281
332
|
# Log the combined text
|
|
282
333
|
MediLink_ConfigLoader.log("Combined text: {}".format(combined_text[:200]), level="DEBUG") # Log first 200 characters
|
|
283
334
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
day_of_week = re.search(day_week_pattern, combined_text, re.IGNORECASE)
|
|
289
|
-
month_day = re.search(month_day_pattern, combined_text, re.IGNORECASE)
|
|
290
|
-
year_match = re.search(year_pattern, combined_text, re.IGNORECASE)
|
|
335
|
+
day_of_week = _DAY_WEEK_PATTERN.search(combined_text, re.IGNORECASE)
|
|
336
|
+
month_day = _MONTH_DAY_PATTERN.search(combined_text, re.IGNORECASE)
|
|
337
|
+
year_match = _YEAR_PATTERN.search(combined_text, re.IGNORECASE)
|
|
291
338
|
|
|
292
339
|
MediLink_ConfigLoader.log("Day of week found: {}".format(day_of_week.group() if day_of_week else 'None'), level="DEBUG")
|
|
293
340
|
MediLink_ConfigLoader.log("Month and day found: {}".format(month_day.group() if month_day else 'None'), level="DEBUG")
|
|
@@ -342,40 +389,34 @@ def remove_directory(path):
|
|
|
342
389
|
|
|
343
390
|
|
|
344
391
|
def normalize_text(text):
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
'MON': 'MONDAY', 'TUE': 'TUESDAY', 'WED': 'WEDNESDAY', 'THU': 'THURSDAY',
|
|
352
|
-
'FRI': 'FRIDAY', 'SAT': 'SATURDAY', 'SUN': 'SUNDAY'
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
for abbr, full in month_map.items():
|
|
356
|
-
text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
|
|
357
|
-
for abbr, full in day_map.items():
|
|
358
|
-
text = re.sub(r'\b' + abbr + r'\b', full, text, flags=re.IGNORECASE)
|
|
392
|
+
# Optimized single-pass processing to avoid O(n²) complexity
|
|
393
|
+
# Process all abbreviations in one pass instead of multiple regex calls
|
|
394
|
+
for abbr, pattern in _MONTH_ABBR_PATTERNS.items():
|
|
395
|
+
text = pattern.sub(_MONTH_MAP[abbr], text)
|
|
396
|
+
for abbr, pattern in _DAY_ABBR_PATTERNS.items():
|
|
397
|
+
text = pattern.sub(_DAY_MAP[abbr], text)
|
|
359
398
|
|
|
360
399
|
return text
|
|
361
400
|
|
|
362
401
|
|
|
363
402
|
def reassemble_year(text):
|
|
364
|
-
#
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
403
|
+
# Optimized year reassembly with early exit conditions
|
|
404
|
+
# First, handle the most common cases with pre-compiled patterns
|
|
405
|
+
for pattern in _YEAR_SPLIT_PATTERNS:
|
|
406
|
+
text = pattern.sub(r'\1\2', text)
|
|
368
407
|
|
|
369
408
|
# Handle the less common cases where the year might be split as (1,1,2) or (2,1,1) or (1,2,1)
|
|
370
|
-
parts =
|
|
409
|
+
parts = _DIGIT_PARTS_PATTERN.findall(text)
|
|
371
410
|
if len(parts) >= 4:
|
|
411
|
+
# Early exit: only process if we have enough parts
|
|
372
412
|
for i in range(len(parts) - 3):
|
|
373
413
|
candidate = ''.join(parts[i:i + 4])
|
|
374
414
|
if len(candidate) == 4 and candidate.isdigit():
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
415
|
+
# More efficient pattern construction
|
|
416
|
+
pattern_parts = [r'\b' + part + r'\b' for part in parts[i:i + 4]]
|
|
417
|
+
pattern = r'\s+'.join(pattern_parts)
|
|
418
|
+
text = re.sub(pattern, candidate, text)
|
|
419
|
+
break # Early exit after first successful combination
|
|
379
420
|
|
|
380
421
|
return text
|
|
381
422
|
|
|
@@ -390,9 +431,8 @@ def parse_patient_id(text):
|
|
|
390
431
|
|
|
391
432
|
def parse_diagnosis_code(text):
|
|
392
433
|
try:
|
|
393
|
-
#
|
|
394
|
-
|
|
395
|
-
matches = pattern.findall(text)
|
|
434
|
+
# Use pre-compiled pattern for better performance
|
|
435
|
+
matches = _DIAGNOSIS_CODE_PATTERN.findall(text)
|
|
396
436
|
|
|
397
437
|
if matches:
|
|
398
438
|
return matches[0] # Return the first match
|
|
@@ -432,27 +472,72 @@ def parse_femto_yes_or_no(text):
|
|
|
432
472
|
return False
|
|
433
473
|
|
|
434
474
|
|
|
435
|
-
def rotate_docx_files(directory):
|
|
436
|
-
|
|
437
|
-
files
|
|
475
|
+
def rotate_docx_files(directory, surgery_dates=None):
|
|
476
|
+
"""
|
|
477
|
+
Process all DOCX files in the specified directory that contain "DR" and "SS" in their filename.
|
|
478
|
+
|
|
479
|
+
Parameters:
|
|
480
|
+
- directory (str): Path to the directory containing DOCX files
|
|
481
|
+
- surgery_dates (set, optional): Set of surgery dates to filter by. If None, processes all files.
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
- dict: Combined patient data from all processed files
|
|
485
|
+
"""
|
|
486
|
+
# PERFORMANCE OPTIMIZATION: Use os.scandir() for more efficient file system operations
|
|
487
|
+
# This reduces the number of file system calls and improves performance with large directories
|
|
488
|
+
valid_files = []
|
|
489
|
+
try:
|
|
490
|
+
# Use os.scandir() for better performance (XP/3.4.4 compatible)
|
|
491
|
+
with os.scandir(directory) as entries:
|
|
492
|
+
for entry in entries:
|
|
493
|
+
# Filter files that contain "DR" and "SS" in the filename
|
|
494
|
+
if (entry.name.endswith('.docx') and
|
|
495
|
+
"DR" in entry.name and
|
|
496
|
+
"SS" in entry.name):
|
|
497
|
+
valid_files.append(entry.path)
|
|
498
|
+
except OSError as e:
|
|
499
|
+
print("Error accessing directory '{}': {}".format(directory, e))
|
|
500
|
+
return {}
|
|
438
501
|
|
|
439
|
-
|
|
440
|
-
|
|
502
|
+
if not valid_files:
|
|
503
|
+
print("No valid DOCX files found in directory: {}".format(directory))
|
|
504
|
+
return {}
|
|
505
|
+
|
|
506
|
+
# Initialize combined patient data dictionary
|
|
507
|
+
combined_patient_data = {}
|
|
508
|
+
|
|
509
|
+
# Process each valid DOCX file
|
|
510
|
+
for filepath in valid_files:
|
|
511
|
+
filename = os.path.basename(filepath) # Extract filename for display
|
|
512
|
+
print("Processing file: {}".format(filename))
|
|
513
|
+
|
|
514
|
+
try:
|
|
515
|
+
# Parse the document with surgery_dates parameter
|
|
516
|
+
patient_data_dict = parse_docx(filepath, surgery_dates or set())
|
|
517
|
+
|
|
518
|
+
# Combine patient data from this file with overall results
|
|
519
|
+
for patient_id, service_dates in patient_data_dict.items():
|
|
520
|
+
if patient_id not in combined_patient_data:
|
|
521
|
+
combined_patient_data[patient_id] = {}
|
|
522
|
+
combined_patient_data[patient_id].update(service_dates)
|
|
523
|
+
|
|
524
|
+
# Print results for this file
|
|
525
|
+
print("Data from file '{}':".format(filename))
|
|
526
|
+
pprint.pprint(patient_data_dict)
|
|
527
|
+
print()
|
|
528
|
+
|
|
529
|
+
except Exception as e:
|
|
530
|
+
print("Error processing file '{}': {}".format(filename, e))
|
|
531
|
+
MediLink_ConfigLoader.log("Error processing DOCX file '{}': {}".format(filepath, e), level="ERROR")
|
|
532
|
+
continue # Continue with next file instead of crashing
|
|
441
533
|
|
|
442
|
-
|
|
443
|
-
for filename in filtered_files:
|
|
444
|
-
filepath = os.path.join(directory, filename)
|
|
445
|
-
# Parse each document and print the resulting dictionary
|
|
446
|
-
patient_data_dict = parse_docx(filepath)
|
|
447
|
-
print("Data from file '{}':".format(filename))
|
|
448
|
-
import pprint
|
|
449
|
-
pprint.pprint(patient_data_dict)
|
|
450
|
-
print()
|
|
534
|
+
return combined_patient_data
|
|
451
535
|
|
|
452
536
|
|
|
453
537
|
def main():
|
|
454
538
|
# Call the function with the directory containing your .docx files
|
|
455
539
|
directory = "C:\\Users\\danie\\Downloads\\"
|
|
540
|
+
# Note: surgery_dates parameter is now optional
|
|
456
541
|
rotate_docx_files(directory)
|
|
457
542
|
|
|
458
543
|
|
|
@@ -640,6 +640,13 @@ def submit_uhc_claim(client, x12_request_data):
|
|
|
640
640
|
it returns a simulated response. If Test Mode is not enabled, it submits the claim and then retrieves
|
|
641
641
|
the claim acknowledgement details using the transaction ID from the initial response.
|
|
642
642
|
|
|
643
|
+
NOTE: This function uses endpoints that may not be available in the new swagger version:
|
|
644
|
+
- /Claims/api/claim-submission/v1 (claim submission)
|
|
645
|
+
- /Claims/api/claim-details/v1 (claim acknowledgement)
|
|
646
|
+
|
|
647
|
+
If these endpoints are deprecated in the new swagger, this function will need to be updated
|
|
648
|
+
to use the new available endpoints.
|
|
649
|
+
|
|
643
650
|
:param client: An instance of APIClient
|
|
644
651
|
:param x12_request_data: The x12 837p data as a string
|
|
645
652
|
:return: The final response containing the claim acknowledgement details or a dummy response if in Test Mode
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
# MediLink_Parser.py
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
# Pre-compile regex patterns for better performance
|
|
7
|
+
_EBT_KEY_VALUE_PATTERN = re.compile(r'([^:]+):\s*(.+?)(?=\s{2,}[^:]+:|$)')
|
|
8
|
+
_ERA_SEGMENT_PATTERN = re.compile(r'\*')
|
|
9
|
+
_277_SEGMENT_PATTERN = re.compile(r'\*')
|
|
4
10
|
|
|
5
11
|
def parse_era_content(content, debug=False):
|
|
6
12
|
extracted_data = []
|
|
7
|
-
normalized_content = content.replace('~\n', '~')
|
|
13
|
+
normalized_content = content.replace('~\n', '~') # Normalize line endings
|
|
8
14
|
lines = normalized_content.split('~')
|
|
9
15
|
|
|
10
16
|
record = {}
|
|
@@ -13,25 +19,27 @@ def parse_era_content(content, debug=False):
|
|
|
13
19
|
is_payer_section = False
|
|
14
20
|
|
|
15
21
|
for line in lines:
|
|
16
|
-
segments =
|
|
22
|
+
segments = _ERA_SEGMENT_PATTERN.split(line)
|
|
17
23
|
|
|
18
24
|
if segments[0] == 'TRN' and len(segments) > 2:
|
|
19
|
-
check_eft = segments[2]
|
|
25
|
+
check_eft = segments[2] # Extract check/EFT number
|
|
20
26
|
|
|
21
27
|
if segments[0] == 'N1':
|
|
22
28
|
if segments[1] == 'PR':
|
|
23
|
-
is_payer_section = True
|
|
29
|
+
is_payer_section = True # Enter payer section
|
|
24
30
|
elif segments[1] == 'PE':
|
|
25
|
-
is_payer_section = False
|
|
31
|
+
is_payer_section = False # Exit payer section
|
|
26
32
|
|
|
27
33
|
if is_payer_section and segments[0] == 'N3' and len(segments) > 1:
|
|
28
|
-
payer_address = segments[1]
|
|
34
|
+
payer_address = segments[1] # Extract payer address
|
|
29
35
|
|
|
30
36
|
if segments[0] == 'CLP' and len(segments) >= 5:
|
|
31
37
|
if record:
|
|
38
|
+
# Calculate adjustment amount if not explicitly provided
|
|
32
39
|
if adjustment_amount == 0 and (write_off > 0 or patient_responsibility > 0):
|
|
33
40
|
adjustment_amount = write_off + patient_responsibility
|
|
34
41
|
|
|
42
|
+
# Update record with calculated amounts
|
|
35
43
|
record.update({
|
|
36
44
|
'Payer Address': payer_address,
|
|
37
45
|
'Allowed Amount': allowed_amount,
|
|
@@ -41,8 +49,10 @@ def parse_era_content(content, debug=False):
|
|
|
41
49
|
})
|
|
42
50
|
extracted_data.append(record)
|
|
43
51
|
|
|
52
|
+
# Reset counters for next record
|
|
44
53
|
allowed_amount, write_off, patient_responsibility, adjustment_amount = 0, 0, 0, 0
|
|
45
54
|
|
|
55
|
+
# Start new record
|
|
46
56
|
record = {
|
|
47
57
|
'Check EFT': check_eft,
|
|
48
58
|
'Chart Number': segments[1],
|
|
@@ -52,19 +62,28 @@ def parse_era_content(content, debug=False):
|
|
|
52
62
|
}
|
|
53
63
|
|
|
54
64
|
elif segments[0] == 'CAS':
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
65
|
+
try:
|
|
66
|
+
if segments[1] == 'CO':
|
|
67
|
+
write_off += float(segments[3]) # Contractual obligation
|
|
68
|
+
elif segments[1] == 'PR':
|
|
69
|
+
patient_responsibility += float(segments[3]) # Patient responsibility
|
|
70
|
+
elif segments[1] == 'OA':
|
|
71
|
+
adjustment_amount += float(segments[3]) # Other adjustments
|
|
72
|
+
except (ValueError, IndexError):
|
|
73
|
+
# Skip malformed CAS segments
|
|
74
|
+
continue
|
|
61
75
|
|
|
62
76
|
elif segments[0] == 'AMT' and segments[1] == 'B6':
|
|
63
|
-
|
|
77
|
+
try:
|
|
78
|
+
allowed_amount += float(segments[2]) # Allowed amount
|
|
79
|
+
except (ValueError, IndexError):
|
|
80
|
+
# Skip malformed AMT segments
|
|
81
|
+
continue
|
|
64
82
|
|
|
65
83
|
elif segments[0] == 'DTM' and (segments[1] == '232' or segments[1] == '472'):
|
|
66
|
-
record['Date of Service'] = segments[2]
|
|
84
|
+
record['Date of Service'] = segments[2] # Service date
|
|
67
85
|
|
|
86
|
+
# Process final record
|
|
68
87
|
if record:
|
|
69
88
|
if adjustment_amount == 0 and (write_off > 0 or patient_responsibility > 0):
|
|
70
89
|
adjustment_amount = write_off + patient_responsibility
|
|
@@ -87,36 +106,36 @@ def parse_277_content(content, debug=False):
|
|
|
87
106
|
segments = content.split('~')
|
|
88
107
|
records = []
|
|
89
108
|
current_record = {}
|
|
109
|
+
|
|
90
110
|
for segment in segments:
|
|
91
|
-
parts =
|
|
111
|
+
parts = _277_SEGMENT_PATTERN.split(segment)
|
|
92
112
|
if parts[0] == 'HL':
|
|
93
113
|
if current_record:
|
|
94
|
-
records.append(current_record)
|
|
95
|
-
current_record = {}
|
|
114
|
+
records.append(current_record) # Save completed record
|
|
115
|
+
current_record = {} # Start new record
|
|
96
116
|
elif parts[0] == 'NM1':
|
|
97
|
-
if parts[1] == 'QC':
|
|
98
|
-
current_record['Patient'] = parts[3]
|
|
99
|
-
elif parts[1] == '41':
|
|
100
|
-
current_record['Clearing House'] = parts[3]
|
|
101
|
-
elif parts[1] == 'PR':
|
|
102
|
-
current_record['Payer'] = parts[3]
|
|
103
|
-
elif parts[0] == 'TRN':
|
|
104
|
-
current_record['Claim #'] = parts[2]
|
|
105
|
-
elif parts[0] == 'STC':
|
|
106
|
-
current_record['Status'] = parts[1]
|
|
117
|
+
if parts[1] == 'QC' and len(parts) > 4:
|
|
118
|
+
current_record['Patient'] = ' '.join([parts[3], parts[4]]) # Patient name
|
|
119
|
+
elif parts[1] == '41' and len(parts) > 3:
|
|
120
|
+
current_record['Clearing House'] = parts[3] # Clearing house
|
|
121
|
+
elif parts[1] == 'PR' and len(parts) > 3:
|
|
122
|
+
current_record['Payer'] = parts[3] # Payer name
|
|
123
|
+
elif parts[0] == 'TRN' and len(parts) > 2:
|
|
124
|
+
current_record['Claim #'] = parts[2] # Claim number
|
|
125
|
+
elif parts[0] == 'STC' and len(parts) > 1:
|
|
126
|
+
current_record['Status'] = parts[1] # Claim status
|
|
107
127
|
if len(parts) > 4:
|
|
108
|
-
current_record['Paid'] = parts[4]
|
|
109
|
-
elif parts[0] == 'DTP':
|
|
128
|
+
current_record['Paid'] = parts[4] # Paid amount
|
|
129
|
+
elif parts[0] == 'DTP' and len(parts) > 3:
|
|
110
130
|
if parts[1] == '472':
|
|
111
|
-
current_record['Serv.'] = parts[3]
|
|
131
|
+
current_record['Serv.'] = parts[3] # Service date
|
|
112
132
|
elif parts[1] == '050':
|
|
113
|
-
current_record['Proc.'] = parts[3]
|
|
114
|
-
elif parts[0] == 'AMT':
|
|
115
|
-
|
|
116
|
-
current_record['Charged'] = parts[2]
|
|
133
|
+
current_record['Proc.'] = parts[3] # Process date
|
|
134
|
+
elif parts[0] == 'AMT' and parts[1] == 'YU' and len(parts) > 2:
|
|
135
|
+
current_record['Charged'] = parts[2] # Charged amount
|
|
117
136
|
|
|
118
137
|
if current_record:
|
|
119
|
-
records.append(current_record)
|
|
138
|
+
records.append(current_record) # Add final record
|
|
120
139
|
|
|
121
140
|
if debug:
|
|
122
141
|
print("Parsed 277 Content:")
|
|
@@ -135,17 +154,22 @@ def parse_dpt_content(content, debug=False):
|
|
|
135
154
|
extracted_data = []
|
|
136
155
|
lines = content.splitlines()
|
|
137
156
|
record = {}
|
|
157
|
+
|
|
138
158
|
for line in lines:
|
|
139
159
|
if 'Patient Account Number:' in line:
|
|
140
160
|
if record:
|
|
141
|
-
extracted_data.append(record)
|
|
142
|
-
record = {}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
161
|
+
extracted_data.append(record) # Save completed record
|
|
162
|
+
record = {} # Start new record
|
|
163
|
+
|
|
164
|
+
# More efficient split - only split on first occurrence
|
|
165
|
+
colon_pos = line.find(':')
|
|
166
|
+
if colon_pos != -1:
|
|
167
|
+
key = line[:colon_pos].strip()
|
|
168
|
+
value = line[colon_pos + 1:].strip()
|
|
169
|
+
record[key] = value # Add key-value pair to current record
|
|
170
|
+
|
|
147
171
|
if record:
|
|
148
|
-
extracted_data.append(record)
|
|
172
|
+
extracted_data.append(record) # Add final record
|
|
149
173
|
|
|
150
174
|
if debug:
|
|
151
175
|
print("Parsed DPT Content:")
|
|
@@ -158,10 +182,7 @@ def parse_ebt_content(content, debug=False):
|
|
|
158
182
|
extracted_data = [] # List to hold all extracted records
|
|
159
183
|
lines = content.splitlines() # Split the content into individual lines
|
|
160
184
|
record = {} # Dictionary to hold the current record being processed
|
|
161
|
-
|
|
162
|
-
# Regular expression pattern to match key-value pairs in the format "Key: Value"
|
|
163
|
-
key_value_pattern = re.compile(r'([^:]+):\s*(.+?)(?=\s{2,}[^:]+?:|$)')
|
|
164
|
-
|
|
185
|
+
|
|
165
186
|
for line in lines:
|
|
166
187
|
# Check for the start of a new record based on the presence of 'Patient Name'
|
|
167
188
|
if 'Patient Name:' in line and record:
|
|
@@ -170,7 +191,7 @@ def parse_ebt_content(content, debug=False):
|
|
|
170
191
|
record = {} # Reset the record for the next entry
|
|
171
192
|
|
|
172
193
|
# Find all key-value pairs in the current line
|
|
173
|
-
matches =
|
|
194
|
+
matches = _EBT_KEY_VALUE_PATTERN.findall(line)
|
|
174
195
|
for key, value in matches:
|
|
175
196
|
key = key.strip() # Remove leading/trailing whitespace from the key
|
|
176
197
|
value = value.strip() # Remove leading/trailing whitespace from the value
|
|
@@ -200,17 +221,22 @@ def parse_ibt_content(content, debug=False):
|
|
|
200
221
|
extracted_data = []
|
|
201
222
|
lines = content.splitlines()
|
|
202
223
|
record = {}
|
|
224
|
+
|
|
203
225
|
for line in lines:
|
|
204
226
|
if 'Submitter Batch ID:' in line:
|
|
205
227
|
if record:
|
|
206
|
-
extracted_data.append(record)
|
|
207
|
-
record = {}
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
228
|
+
extracted_data.append(record) # Save completed record
|
|
229
|
+
record = {} # Start new record
|
|
230
|
+
|
|
231
|
+
# More efficient split - only split on first occurrence
|
|
232
|
+
colon_pos = line.find(':')
|
|
233
|
+
if colon_pos != -1:
|
|
234
|
+
key = line[:colon_pos].strip()
|
|
235
|
+
value = line[colon_pos + 1:].strip()
|
|
236
|
+
record[key] = value # Add key-value pair to current record
|
|
237
|
+
|
|
212
238
|
if record:
|
|
213
|
-
extracted_data.append(record)
|
|
239
|
+
extracted_data.append(record) # Add final record
|
|
214
240
|
|
|
215
241
|
if debug:
|
|
216
242
|
print("Parsed IBT Content:")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|