medicafe 0.250728.9__py3-none-any.whl → 0.250805.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of medicafe might be problematic. Click here for more details.

Files changed (57) hide show
  1. MediBot/MediBot.bat +233 -19
  2. MediBot/MediBot.py +138 -46
  3. MediBot/MediBot_Crosswalk_Library.py +127 -623
  4. MediBot/MediBot_Crosswalk_Utils.py +618 -0
  5. MediBot/MediBot_Preprocessor.py +72 -17
  6. MediBot/MediBot_Preprocessor_lib.py +470 -76
  7. MediBot/MediBot_UI.py +32 -17
  8. MediBot/MediBot_dataformat_library.py +68 -20
  9. MediBot/MediBot_docx_decoder.py +120 -19
  10. MediBot/MediBot_smart_import.py +180 -0
  11. MediBot/__init__.py +89 -0
  12. MediBot/get_medicafe_version.py +25 -0
  13. MediBot/update_json.py +35 -6
  14. MediBot/update_medicafe.py +19 -1
  15. MediCafe/MediLink_ConfigLoader.py +160 -0
  16. MediCafe/__init__.py +171 -0
  17. MediCafe/__main__.py +222 -0
  18. MediCafe/api_core.py +1098 -0
  19. MediCafe/api_core_backup.py +427 -0
  20. MediCafe/api_factory.py +306 -0
  21. MediCafe/api_utils.py +356 -0
  22. MediCafe/core_utils.py +450 -0
  23. MediCafe/graphql_utils.py +445 -0
  24. MediCafe/logging_config.py +123 -0
  25. MediCafe/logging_demo.py +61 -0
  26. MediCafe/migration_helpers.py +463 -0
  27. MediCafe/smart_import.py +436 -0
  28. MediLink/MediLink_837p_cob_library.py +28 -28
  29. MediLink/MediLink_837p_encoder.py +33 -34
  30. MediLink/MediLink_837p_encoder_library.py +226 -150
  31. MediLink/MediLink_837p_utilities.py +129 -5
  32. MediLink/MediLink_API_Generator.py +83 -60
  33. MediLink/MediLink_API_v3.py +1 -1
  34. MediLink/MediLink_ClaimStatus.py +177 -31
  35. MediLink/MediLink_DataMgmt.py +378 -63
  36. MediLink/MediLink_Decoder.py +20 -1
  37. MediLink/MediLink_Deductible.py +155 -28
  38. MediLink/MediLink_Display_Utils.py +72 -0
  39. MediLink/MediLink_Down.py +127 -5
  40. MediLink/MediLink_Gmail.py +712 -653
  41. MediLink/MediLink_PatientProcessor.py +257 -0
  42. MediLink/MediLink_UI.py +85 -71
  43. MediLink/MediLink_Up.py +28 -4
  44. MediLink/MediLink_insurance_utils.py +227 -230
  45. MediLink/MediLink_main.py +248 -0
  46. MediLink/MediLink_smart_import.py +264 -0
  47. MediLink/__init__.py +93 -1
  48. MediLink/insurance_type_integration_test.py +13 -3
  49. MediLink/test.py +1 -1
  50. MediLink/test_timing.py +59 -0
  51. {medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/METADATA +1 -1
  52. medicafe-0.250805.0.dist-info/RECORD +81 -0
  53. medicafe-0.250805.0.dist-info/entry_points.txt +2 -0
  54. {medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/top_level.txt +1 -0
  55. medicafe-0.250728.9.dist-info/RECORD +0 -59
  56. {medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/LICENSE +0 -0
  57. {medicafe-0.250728.9.dist-info → medicafe-0.250805.0.dist-info}/WHEEL +0 -0
@@ -1,8 +1,44 @@
1
- #MediBot_Preprocessor_lib.py
2
- from collections import OrderedDict, defaultdict
1
+ # MediBot_Preprocessor_lib.py
2
+ """
3
+ Core preprocessing library for MediBot
4
+ Contains core preprocessing functions and utilities.
5
+ """
6
+
7
+ import csv, time, os, sys
3
8
  from datetime import datetime, timedelta
4
- import os, csv, sys, time
5
- import chardet # Ensure chardet is imported
9
+ from collections import OrderedDict
10
+
11
+ # Try to import chardet for encoding detection
12
+ try:
13
+ import chardet
14
+ except ImportError:
15
+ chardet = None # Fallback if chardet is not available
16
+
17
+ # Use core utilities for standardized imports
18
+ from MediCafe.core_utils import (
19
+ import_medibot_module,
20
+ import_medilink_module,
21
+ get_config_loader_with_fallback
22
+ )
23
+
24
+ # Initialize configuration loader with fallback
25
+ MediLink_ConfigLoader = get_config_loader_with_fallback()
26
+
27
+ # Import MediLink_DataMgmt using centralized import function
28
+ MediLink_DataMgmt = import_medilink_module('MediLink_DataMgmt')
29
+
30
+ # Import MediBot modules using centralized import functions
31
+ MediBot_UI = import_medibot_module('MediBot_UI')
32
+ if MediBot_UI:
33
+ app_control = getattr(MediBot_UI, 'app_control', None)
34
+ else:
35
+ app_control = None
36
+
37
+ MediBot_docx_decoder = import_medibot_module('MediBot_docx_decoder')
38
+ if MediBot_docx_decoder:
39
+ parse_docx = getattr(MediBot_docx_decoder, 'parse_docx', None)
40
+ else:
41
+ parse_docx = None
6
42
 
7
43
  # Add the parent directory of the project to the Python path
8
44
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
@@ -11,22 +47,25 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
11
47
  _config_cache = None
12
48
  _crosswalk_cache = None
13
49
 
14
- # Attempt to import necessary modules, falling back if they are not found
15
- try:
16
- import MediLink_ConfigLoader
17
- import MediLink_DataMgmt
18
- except ImportError:
19
- from MediLink import MediLink_ConfigLoader, MediLink_DataMgmt
50
+ # Use core utilities for standardized imports
51
+ from MediCafe.core_utils import get_shared_config_loader
52
+ MediLink_ConfigLoader = get_shared_config_loader()
20
53
 
54
+ # Ensure MediLink_ConfigLoader is available
55
+ if MediLink_ConfigLoader is None:
56
+ print("Warning: MediLink_ConfigLoader not available. Some functionality may be limited.")
57
+ # Create a minimal fallback logger
58
+ class FallbackLogger:
59
+ def log(self, message, level="INFO"):
60
+ print("[{}] {}".format(level, message))
61
+ MediLink_ConfigLoader = FallbackLogger()
62
+
63
+ # Import centralized logging configuration
21
64
  try:
22
- from MediBot_UI import app_control
23
- from MediBot_docx_decoder import parse_docx
65
+ from MediCafe.logging_config import PERFORMANCE_LOGGING
24
66
  except ImportError:
25
- from MediBot import MediBot_UI
26
- app_control = MediBot_UI.app_control
27
- from MediBot import MediBot_docx_decoder
28
- parse_docx = MediBot_docx_decoder.parse_docx
29
-
67
+ # Fallback to local flag if centralized config is not available
68
+ PERFORMANCE_LOGGING = False
30
69
  class InitializationError(Exception):
31
70
  def __init__(self, message):
32
71
  self.message = message
@@ -109,9 +148,14 @@ def load_csv_data(csv_file_path):
109
148
  # Detect the file encoding
110
149
  with open(csv_file_path, 'rb') as f:
111
150
  raw_data = f.read()
112
- result = chardet.detect(raw_data)
113
- encoding = result['encoding']
114
- confidence = result['confidence']
151
+ if chardet:
152
+ result = chardet.detect(raw_data)
153
+ encoding = result['encoding']
154
+ confidence = result['confidence']
155
+ else:
156
+ # Fallback to UTF-8 when chardet is not available
157
+ encoding = 'utf-8'
158
+ confidence = 1.0
115
159
  print("Detected encoding: {} (Confidence: {:.2f})".format(encoding, confidence))
116
160
 
117
161
  # Read the CSV file with the detected encoding
@@ -171,6 +215,49 @@ def filter_rows(csv_data):
171
215
  excluded_insurance = {'AETNA', 'AETNA MEDICARE', 'HUMANA MED HMO'}
172
216
  csv_data[:] = [row for row in csv_data if row.get('Patient ID') and row.get('Primary Insurance') not in excluded_insurance]
173
217
 
218
+ def detect_date_format(date_str):
219
+ """
220
+ PERFORMANCE OPTIMIZATION: Quickly detect the most likely date format
221
+ to avoid trying all formats for every date string.
222
+
223
+ Parameters:
224
+ - date_str (str): The date string to analyze
225
+
226
+ Returns:
227
+ - str: The most likely format string, or None if unclear
228
+ """
229
+ if not date_str:
230
+ return None
231
+
232
+ # Remove time components if present
233
+ date_only = date_str.split()[0]
234
+
235
+ # Count separators to guess format
236
+ slash_count = date_only.count('/')
237
+ dash_count = date_only.count('-')
238
+
239
+ # Check for 4-digit year (likely YYYY format)
240
+ if len(date_only) >= 10: # YYYY-MM-DD or YYYY/MM/DD
241
+ if dash_count == 2:
242
+ return '%Y-%m-%d'
243
+ elif slash_count == 2:
244
+ return '%Y/%m/%d'
245
+
246
+ # Check for 2-digit year (likely MM/DD/YY or MM-DD-YY)
247
+ if len(date_only) >= 8: # MM/DD/YY or MM-DD-YY
248
+ if dash_count == 2:
249
+ return '%m-%d-%y'
250
+ elif slash_count == 2:
251
+ return '%m/%d/%y'
252
+
253
+ # Default to most common format (MM/DD/YYYY)
254
+ if dash_count == 2:
255
+ return '%m-%d-%Y'
256
+ elif slash_count == 2:
257
+ return '%m/%d/%Y'
258
+
259
+ return None
260
+
174
261
  def clean_surgery_date_string(date_str):
175
262
  """
176
263
  Cleans and normalizes surgery date strings to handle damaged data.
@@ -193,26 +280,68 @@ def clean_surgery_date_string(date_str):
193
280
  date_str = date_str.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
194
281
  date_str = ' '.join(date_str.split()) # Normalize whitespace
195
282
 
196
- # Handle common date format variations
197
- date_formats = [
198
- '%m/%d/%Y', # 12/25/2023
199
- '%m-%d-%Y', # 12-25-2023
200
- '%m/%d/%y', # 12/25/23
201
- '%m-%d-%y', # 12-25-23
202
- '%Y/%m/%d', # 2023/12/25
203
- '%Y-%m-%d', # 2023-12-25
204
- '%m/%d/%Y %H:%M:%S', # 12/25/2023 14:30:00
205
- '%m-%d-%Y %H:%M:%S', # 12-25-2023 14:30:00
206
- ]
207
-
208
- # Try to parse with different formats
209
- for fmt in date_formats:
283
+ # PERFORMANCE OPTIMIZATION: Try detected format first
284
+ detected_format = detect_date_format(date_str)
285
+ if detected_format:
210
286
  try:
211
- parsed_date = datetime.strptime(date_str, fmt)
212
- # Return in standard MM/DD/YYYY format
287
+ parsed_date = datetime.strptime(date_str, detected_format)
213
288
  return parsed_date.strftime('%m/%d/%Y')
214
289
  except ValueError:
215
- continue
290
+ pass
291
+
292
+ # PERFORMANCE OPTIMIZATION: Try most common format first (MM/DD/YYYY)
293
+ # This reduces the average number of format attempts from 8 to ~1-2
294
+ try:
295
+ parsed_date = datetime.strptime(date_str, '%m/%d/%Y')
296
+ return parsed_date.strftime('%m/%d/%Y')
297
+ except ValueError:
298
+ pass
299
+
300
+ # PERFORMANCE OPTIMIZATION: Try second most common format (MM-DD-YYYY)
301
+ try:
302
+ parsed_date = datetime.strptime(date_str, '%m-%d-%Y')
303
+ return parsed_date.strftime('%m/%d/%Y')
304
+ except ValueError:
305
+ pass
306
+
307
+ # PERFORMANCE OPTIMIZATION: Try 2-digit year formats only if needed
308
+ try:
309
+ parsed_date = datetime.strptime(date_str, '%m/%d/%y')
310
+ return parsed_date.strftime('%m/%d/%Y')
311
+ except ValueError:
312
+ pass
313
+
314
+ try:
315
+ parsed_date = datetime.strptime(date_str, '%m-%d-%y')
316
+ return parsed_date.strftime('%m/%d/%Y')
317
+ except ValueError:
318
+ pass
319
+
320
+ # PERFORMANCE OPTIMIZATION: Try YYYY formats only if needed
321
+ try:
322
+ parsed_date = datetime.strptime(date_str, '%Y/%m/%d')
323
+ return parsed_date.strftime('%m/%d/%Y')
324
+ except ValueError:
325
+ pass
326
+
327
+ try:
328
+ parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
329
+ return parsed_date.strftime('%m/%d/%Y')
330
+ except ValueError:
331
+ pass
332
+
333
+ # PERFORMANCE OPTIMIZATION: Try datetime formats only if needed
334
+ try:
335
+ parsed_date = datetime.strptime(date_str, '%m/%d/%Y %H:%M:%S')
336
+ return parsed_date.strftime('%m/%d/%Y')
337
+ except ValueError:
338
+ pass
339
+
340
+ try:
341
+ parsed_date = datetime.strptime(date_str, '%m-%d-%Y %H:%M:%S')
342
+ return parsed_date.strftime('%m/%d/%Y')
343
+ except ValueError:
344
+ pass
216
345
 
217
346
  # If no format matches, try to extract date components
218
347
  try:
@@ -248,31 +377,89 @@ def convert_surgery_date(csv_data):
248
377
  Parameters:
249
378
  - csv_data (list): List of dictionaries containing CSV row data
250
379
  """
251
- for row in csv_data:
380
+ # TIMING: Start surgery date conversion with granular tracking
381
+ total_start_time = time.time()
382
+ date_cleaning_time = 0
383
+ date_parsing_time = 0
384
+ processed_count = 0
385
+ empty_count = 0
386
+ error_count = 0
387
+
388
+ print("Starting surgery date conversion for {} rows...".format(len(csv_data)))
389
+ # LOGGING STRATEGY: Only log start/end of looped events at INFO level, not individual successes
390
+ # MediLink_ConfigLoader.log("Starting surgery date conversion for {} rows...".format(len(csv_data)), level="INFO") # REMOVED
391
+
392
+ # PERFORMANCE OPTIMIZATION: Pre-compile datetime.strptime for the most common format
393
+ # This avoids repeated format string parsing
394
+ standard_format = '%m/%d/%Y'
395
+
396
+ for row_idx, row in enumerate(csv_data, 1):
252
397
  surgery_date_str = row.get('Surgery Date', '')
253
398
 
254
399
  if not surgery_date_str:
255
- MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
400
+ empty_count += 1
401
+ # LOGGING STRATEGY: Only log actual errors/failures, not routine empty dates
402
+ # if empty_count <= 5: # Only log first 5 empty dates
403
+ # MediLink_ConfigLoader.log("Warning: Surgery Date not found for row: {}".format(row), level="WARNING")
404
+ # print("Surgery Date not found for row: {}".format(row))
256
405
  row['Surgery Date'] = datetime.min # Assign a minimum datetime value if empty
257
- print("Surgery Date not found for row: {}".format(row))
258
406
  else:
407
+ # TIMING: Start date string cleaning
408
+ cleaning_start = time.time()
409
+
259
410
  # Clean the date string first
260
411
  cleaned_date_str = clean_surgery_date_string(surgery_date_str)
261
412
 
413
+ # TIMING: End date string cleaning
414
+ cleaning_end = time.time()
415
+ date_cleaning_time += (cleaning_end - cleaning_start)
416
+
262
417
  if not cleaned_date_str:
263
- MediLink_ConfigLoader.log("Error: Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row), level="ERROR")
418
+ error_count += 1
419
+ # LOGGING STRATEGY: Log actual errors (cleaning failures) at INFO level
420
+ if error_count <= 5: # Only log first 5 errors
421
+ MediLink_ConfigLoader.log("Error: Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row), level="INFO")
422
+ print("Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row))
264
423
  row['Surgery Date'] = datetime.min # Assign a minimum datetime value if cleaning fails
265
- print("Could not clean Surgery Date '{}' for row: {}".format(surgery_date_str, row))
266
424
  else:
425
+ # TIMING: Start date parsing
426
+ parsing_start = time.time()
427
+
267
428
  try:
429
+ # PERFORMANCE OPTIMIZATION: Use pre-compiled format string
268
430
  # Parse the cleaned date string
269
- row['Surgery Date'] = datetime.strptime(cleaned_date_str, '%m/%d/%Y')
270
- MediLink_ConfigLoader.log("Successfully cleaned and parsed Surgery Date '{}' -> '{}' for row: {}".format(
271
- surgery_date_str, cleaned_date_str, row), level="DEBUG")
431
+ row['Surgery Date'] = datetime.strptime(cleaned_date_str, standard_format)
432
+ processed_count += 1
433
+ # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
434
+ # if processed_count <= 10 or processed_count % 100 == 0: # Log first 10 and every 100th
435
+ # MediLink_ConfigLoader.log("Successfully cleaned and parsed Surgery Date '{}' -> '{}' for row: {}".format(
436
+ # surgery_date_str, cleaned_date_str, row), level="DEBUG")
272
437
  except ValueError as e:
273
- MediLink_ConfigLoader.log("Error parsing cleaned Surgery Date '{}': {} for row: {}".format(
274
- cleaned_date_str, e, row), level="ERROR")
438
+ error_count += 1
439
+ # LOGGING STRATEGY: Log actual errors (parsing failures) at INFO level
440
+ if error_count <= 5: # Only log first 5 parsing errors
441
+ MediLink_ConfigLoader.log("Error parsing cleaned Surgery Date '{}': {} for row: {}".format(
442
+ cleaned_date_str, e, row), level="INFO")
275
443
  row['Surgery Date'] = datetime.min # Assign a minimum datetime value if parsing fails
444
+
445
+ # TIMING: End date parsing
446
+ parsing_end = time.time()
447
+ date_parsing_time += (parsing_end - parsing_start)
448
+
449
+ # TIMING: End total surgery date conversion
450
+ total_end_time = time.time()
451
+ total_duration = total_end_time - total_start_time
452
+
453
+ if PERFORMANCE_LOGGING:
454
+ print("Surgery date conversion completed:")
455
+ print(" - Total duration: {:.2f} seconds".format(total_duration))
456
+ print(" - Date cleaning time: {:.2f} seconds ({:.1f}%)".format(date_cleaning_time, (date_cleaning_time/total_duration)*100))
457
+ print(" - Date parsing time: {:.2f} seconds ({:.1f}%)".format(date_parsing_time, (date_parsing_time/total_duration)*100))
458
+ print(" - Processed: {} rows, Empty: {} rows, Errors: {} rows".format(processed_count, empty_count, error_count))
459
+
460
+ # LOGGING STRATEGY: Log completion summary at INFO level (end of looped event)
461
+ MediLink_ConfigLoader.log("Surgery date conversion completed - Total: {:.2f}s, Cleaning: {:.2f}s, Parsing: {:.2f}s, Processed: {}, Empty: {}, Errors: {}".format(
462
+ total_duration, date_cleaning_time, date_parsing_time, processed_count, empty_count, error_count), level="INFO")
276
463
 
277
464
  def sort_and_deduplicate(csv_data):
278
465
  # Create a dictionary to hold unique patients based on Patient ID
@@ -479,13 +666,31 @@ def NEW_update_insurance_ids(csv_data, config, crosswalk):
479
666
  row['Ins1 Insurance ID'] = None
480
667
 
481
668
  def update_insurance_ids(csv_data, config, crosswalk):
482
- MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
669
+ # LOGGING STRATEGY: Remove DEBUG level function start log - DEBUG is typically silent anyway
670
+ # MediLink_ConfigLoader.log("Starting update_insurance_ids function.", level="DEBUG")
671
+
672
+ # TIMING: Start insurance ID updates with granular tracking
673
+ total_start_time = time.time()
674
+ lookup_build_time = 0
675
+ csv_processing_time = 0
676
+ processed_count = 0
677
+ medicare_count = 0
678
+ regular_count = 0
679
+ placeholder_count = 0
680
+
681
+ print("Starting insurance ID updates for {} rows...".format(len(csv_data)))
682
+ # LOGGING STRATEGY: Only log start/end of looped events at INFO level, not individual successes
683
+ # MediLink_ConfigLoader.log("Starting insurance ID updates for {} rows...".format(len(csv_data)), level="INFO") # REMOVED
684
+
685
+ # TIMING: Start lookup dictionary building
686
+ lookup_start_time = time.time()
483
687
 
484
688
  # PERFORMANCE FIX: Pre-build optimized lookup dictionaries for both regular and Medicare IDs
485
689
  # This reduces Medicare processing overhead by building lookups once instead of repeated processing
486
690
  payer_id_to_medisoft = {}
487
691
  payer_id_to_medicare = {}
488
- MediLink_ConfigLoader.log("Initialized optimized lookup dictionaries for Medicare and regular IDs.", level="DEBUG")
692
+ # LOGGING STRATEGY: Remove DEBUG level initialization log - DEBUG is typically silent anyway
693
+ # MediLink_ConfigLoader.log("Initialized optimized lookup dictionaries for Medicare and regular IDs.", level="DEBUG")
489
694
 
490
695
  # Build both lookup dictionaries simultaneously to avoid multiple iterations
491
696
  for payer_id, details in crosswalk.get('payer_id', {}).items():
@@ -501,14 +706,28 @@ def update_insurance_ids(csv_data, config, crosswalk):
501
706
  payer_id_to_medisoft[payer_id] = int(medisoft_ids[0]) if medisoft_ids else None
502
707
  payer_id_to_medicare[payer_id] = int(medicare_ids[0]) if medicare_ids else None
503
708
 
504
- MediLink_ConfigLoader.log("Processed Payer ID '{}': Regular IDs: {}, Medicare IDs: {}".format(
505
- payer_id, medisoft_ids, medicare_ids), level="DEBUG")
709
+ # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
710
+ # if len(payer_id_to_medisoft) <= 10 or len(payer_id_to_medisoft) % 50 == 0: # Log first 10 and every 50th
711
+ # MediLink_ConfigLoader.log("Processed Payer ID '{}': Regular IDs: {}, Medicare IDs: {}".format(
712
+ # payer_id, medisoft_ids, medicare_ids), level="DEBUG")
713
+
714
+ # TIMING: End lookup dictionary building
715
+ lookup_end_time = time.time()
716
+ lookup_build_time = lookup_end_time - lookup_start_time
717
+
718
+ if PERFORMANCE_LOGGING:
719
+ print("Built lookup dictionaries in {:.2f} seconds for {} payer IDs".format(lookup_build_time, len(payer_id_to_medisoft)))
506
720
 
721
+
722
+ # TIMING: Start CSV processing
723
+ csv_start_time = time.time()
724
+
507
725
  # PERFORMANCE FIX: Single pass through CSV data with optimized Medicare ID resolution
508
726
  for row_idx, row in enumerate(csv_data, 1):
509
727
  ins1_payer_id = row.get('Ins1 Payer ID', '').strip()
510
- # PERFORMANCE FIX: Use enumerate index instead of csv_data.index() which is O(n)
511
- MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(row_idx, ins1_payer_id), level="DEBUG")
728
+ # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
729
+ # if row_idx <= 10 or row_idx % 100 == 0: # Log first 10 and every 100th
730
+ # MediLink_ConfigLoader.log("Processing row #{} with Ins1 Payer ID '{}'.".format(row_idx, ins1_payer_id), level="DEBUG")
512
731
 
513
732
  # Try Medicare ID first, then fall back to regular ID (optimized Medicare processing)
514
733
  insurance_id = (payer_id_to_medicare.get(ins1_payer_id) or
@@ -523,11 +742,41 @@ def update_insurance_ids(csv_data, config, crosswalk):
523
742
  'medisoft_medicare_id': [], # Placeholder for future Medicare IDs
524
743
  'endpoint': None # Placeholder for future endpoint
525
744
  }
526
- MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
745
+ placeholder_count += 1
746
+ # LOGGING STRATEGY: Log actual events (new payer IDs) at INFO level
747
+ if placeholder_count <= 5: # Only log first 5 placeholders
748
+ MediLink_ConfigLoader.log("Added placeholder entry for new Payer ID '{}'.".format(ins1_payer_id), level="INFO")
749
+ elif insurance_id == payer_id_to_medicare.get(ins1_payer_id):
750
+ medicare_count += 1
751
+ else:
752
+ regular_count += 1
527
753
 
528
754
  # Assign the resolved insurance ID to the row
529
755
  row['Ins1 Insurance ID'] = insurance_id
530
- MediLink_ConfigLoader.log("Assigned Insurance ID '{}' to row with Ins1 Payer ID '{}'.".format(insurance_id, ins1_payer_id), level="DEBUG")
756
+ processed_count += 1
757
+ # LOGGING STRATEGY: Remove success logging - DEBUG is typically silent anyway
758
+ # if processed_count <= 10 or processed_count % 100 == 0: # Log first 10 and every 100th
759
+ # MediLink_ConfigLoader.log("Assigned Insurance ID '{}' to row with Ins1 Payer ID '{}'.".format(insurance_id, ins1_payer_id), level="DEBUG")
760
+
761
+ # TIMING: End CSV processing
762
+ csv_end_time = time.time()
763
+ csv_processing_time = csv_end_time - csv_start_time
764
+
765
+ # TIMING: End total insurance ID updates
766
+ total_end_time = time.time()
767
+ total_duration = total_end_time - total_start_time
768
+
769
+ if PERFORMANCE_LOGGING:
770
+ print("Insurance ID updates completed:")
771
+ print(" - Total duration: {:.2f} seconds".format(total_duration))
772
+ print(" - Lookup building time: {:.2f} seconds ({:.1f}%)".format(lookup_build_time, (lookup_build_time/total_duration)*100))
773
+ print(" - CSV processing time: {:.2f} seconds ({:.1f}%)".format(csv_processing_time, (csv_processing_time/total_duration)*100))
774
+ print(" - Processed: {} rows, Medicare: {} rows, Regular: {} rows, Placeholders: {} rows".format(
775
+ processed_count, medicare_count, regular_count, placeholder_count))
776
+
777
+ # LOGGING STRATEGY: Log completion summary at INFO level (end of looped event)
778
+ MediLink_ConfigLoader.log("Insurance ID updates completed - Total: {:.2f}s, Lookup: {:.2f}s, Processing: {:.2f}s, Processed: {}, Medicare: {}, Regular: {}, Placeholders: {}".format(
779
+ total_duration, lookup_build_time, csv_processing_time, processed_count, medicare_count, regular_count, placeholder_count), level="INFO")
531
780
 
532
781
  def update_procedure_codes(csv_data, crosswalk):
533
782
 
@@ -591,6 +840,11 @@ def update_procedure_codes(csv_data, crosswalk):
591
840
 
592
841
  def update_diagnosis_codes(csv_data):
593
842
  try:
843
+ # TIMING: Start surgery schedule parsing timing
844
+ parsing_start_time = time.time()
845
+ print("Starting surgery schedule parsing at: {}".format(time.strftime("%H:%M:%S")))
846
+ MediLink_ConfigLoader.log("Starting surgery schedule parsing at: {}".format(time.strftime("%H:%M:%S")), level="INFO")
847
+
594
848
  # Use cached configuration instead of loading repeatedly
595
849
  config, crosswalk = get_cached_configuration()
596
850
 
@@ -613,7 +867,7 @@ def update_diagnosis_codes(csv_data):
613
867
  min_surgery_date = min(surgery_dates)
614
868
  max_surgery_date = max(surgery_dates)
615
869
 
616
- # Apply a ±8-day margin to the surgery dates... Increased from 5 days.
870
+ # Apply a +/-8-day margin to the surgery dates... Increased from 5 days.
617
871
  margin = timedelta(days=8)
618
872
  threshold_start = min_surgery_date - margin
619
873
  threshold_end = max_surgery_date + margin
@@ -625,6 +879,9 @@ def update_diagnosis_codes(csv_data):
625
879
 
626
880
  MediLink_ConfigLoader.log("BAD IDEA: Processing DOCX files modified between {} and {}.".format(threshold_start, threshold_end), level="INFO")
627
881
 
882
+ # TIMING: Start file system operations
883
+ filesystem_start_time = time.time()
884
+
628
885
  # PERFORMANCE OPTIMIZATION: Batch file system operations with caching
629
886
  # Pre-convert threshold timestamps for efficient comparison (Windows XP compatible)
630
887
  threshold_start_ts = threshold_start.timestamp() if hasattr(threshold_start, 'timestamp') else time.mktime(threshold_start.timetuple())
@@ -632,26 +889,33 @@ def update_diagnosis_codes(csv_data):
632
889
 
633
890
  valid_files = []
634
891
  try:
635
- # Use os.scandir() with optimized timestamp comparison (XP/3.4.4 compatible)
636
- with os.scandir(local_storage_path) as entries:
637
- for entry in entries:
638
- if entry.name.endswith('.docx'):
639
- # Get file modification time in single operation
640
- try:
641
- stat_info = entry.stat()
642
- # Direct timestamp comparison avoids datetime conversion overhead
643
- if threshold_start_ts <= stat_info.st_mtime <= threshold_end_ts:
644
- valid_files.append(entry.path)
645
- except (OSError, ValueError):
646
- # Skip files with invalid modification times
647
- continue
892
+ # Use os.listdir() with optimized timestamp comparison (XP/3.4.4 compatible)
893
+ for filename in os.listdir(local_storage_path):
894
+ if filename.endswith('.docx'):
895
+ filepath = os.path.join(local_storage_path, filename)
896
+ # Get file modification time in single operation
897
+ try:
898
+ stat_info = os.stat(filepath)
899
+ # Direct timestamp comparison avoids datetime conversion overhead
900
+ if threshold_start_ts <= stat_info.st_mtime <= threshold_end_ts:
901
+ valid_files.append(filepath)
902
+ except (OSError, ValueError):
903
+ # Skip files with invalid modification times
904
+ continue
648
905
  except OSError:
649
906
  MediLink_ConfigLoader.log("Error accessing directory: {}".format(local_storage_path), level="ERROR")
650
907
  return
651
908
 
909
+ # TIMING: End file system operations
910
+ filesystem_end_time = time.time()
911
+ filesystem_duration = filesystem_end_time - filesystem_start_time
912
+
652
913
  # PERFORMANCE OPTIMIZATION: Log file count for debugging without processing overhead
653
914
  MediLink_ConfigLoader.log("Found {} DOCX files within date threshold".format(len(valid_files)), level="INFO")
654
915
 
916
+ # TIMING: Start CSV data preprocessing
917
+ csv_prep_start_time = time.time()
918
+
655
919
  # PERFORMANCE OPTIMIZATION: Pre-process patient IDs for efficient lookup
656
920
  # Create a set of patient IDs from CSV data for faster lookups
657
921
  patient_ids_in_csv = {row.get('Patient ID', '').strip() for row in csv_data}
@@ -666,20 +930,81 @@ def update_diagnosis_codes(csv_data):
666
930
  surgery_date_strings[patient_id] = surgery_date.strftime("%m-%d-%Y")
667
931
  else:
668
932
  surgery_date_strings[patient_id] = ''
933
+
934
+ # TIMING: End CSV data preprocessing
935
+ csv_prep_end_time = time.time()
936
+ csv_prep_duration = csv_prep_end_time - csv_prep_start_time
937
+
938
+ # TIMING: Log before processing DOCX files
939
+ docx_processing_start_time = time.time()
940
+ print("Found {} DOCX files to process. Starting DOCX parsing...".format(len(valid_files)))
941
+ MediLink_ConfigLoader.log("Found {} DOCX files to process. Starting DOCX parsing...".format(len(valid_files)), level="INFO")
942
+
943
+ # TIMING: Track individual DOCX file processing
944
+ docx_files_processed = 0
945
+ docx_files_skipped = 0
946
+ docx_parse_errors = 0
669
947
 
670
948
  # Process valid DOCX files
671
949
  for filepath in valid_files:
672
- MediLink_ConfigLoader.log("Processing DOCX file: {}".format(filepath), level="INFO")
950
+ # TIMING: Start individual file processing
951
+ file_start_time = time.time()
952
+
673
953
  try:
674
954
  patient_data = parse_docx(filepath, surgery_dates) # Pass surgery_dates to parse_docx
955
+ docx_files_processed += 1
956
+
675
957
  # PERFORMANCE OPTIMIZATION: Use defaultdict for more efficient dictionary operations
676
958
  for patient_id, service_dates in patient_data.items():
677
959
  if patient_id not in all_patient_data:
678
960
  all_patient_data[patient_id] = {}
679
961
  for date_of_service, diagnosis_data in service_dates.items():
962
+ # TODO: SURGERY SCHEDULE CONFLICT RESOLUTION
963
+ # Implement enhanced conflict detection and logging as outlined in
964
+ # surgery_schedule_conflict_resolution_strategy.md
965
+ #
966
+ # Current behavior: Silent overwriting with latest file wins
967
+ # Proposed enhancement:
968
+ # 1. Detect when multiple files contain data for same date
969
+ # 2. Log conflicts with date-organized notifications showing:
970
+ # - Source files (with modification timestamps)
971
+ # - Patients affected (added/removed/modified)
972
+ # - Specific changes (diagnosis, laterality, etc.)
973
+ # 3. Use file modification time to determine priority
974
+ # 4. Generate summary report organized by surgery date
975
+ #
976
+ # Example notification format:
977
+ # "SURGERY SCHEDULE CONFLICTS DETECTED FOR: 12/15/2023"
978
+ # " Original: file1.docx (modified: 08:30:00)"
979
+ # " Revised: file2.docx (modified: 14:45:00)"
980
+ # " Patients affected: 3 modified, 1 added, 1 removed"
981
+ # " Resolution: Using latest file (file2.docx)"
982
+ #
983
+ # This will provide transparency when revised schedules overwrite
984
+ # original schedules, organized by the affected surgery dates.
680
985
  all_patient_data[patient_id][date_of_service] = diagnosis_data
681
986
  except Exception as e:
987
+ docx_parse_errors += 1
682
988
  MediLink_ConfigLoader.log("Error parsing DOCX file {}: {}".format(filepath, e), level="ERROR")
989
+
990
+ # TIMING: End individual file processing
991
+ file_end_time = time.time()
992
+ file_duration = file_end_time - file_start_time
993
+
994
+ # Log slow files (taking more than 1 second)
995
+ if file_duration > 1.0 and PERFORMANCE_LOGGING:
996
+ print(" - Slow file: {} (Duration: {:.2f} seconds)".format(os.path.basename(filepath), file_duration))
997
+
998
+ # TIMING: Log DOCX processing completion
999
+ docx_processing_end_time = time.time()
1000
+ docx_processing_duration = docx_processing_end_time - docx_processing_start_time
1001
+ if PERFORMANCE_LOGGING:
1002
+ print("DOCX parsing completed at: {} (Duration: {:.2f} seconds)".format(
1003
+ time.strftime("%H:%M:%S"), docx_processing_duration))
1004
+ print(" - Files processed: {}, Files skipped: {}, Parse errors: {}".format(
1005
+ docx_files_processed, docx_files_skipped, docx_parse_errors))
1006
+ MediLink_ConfigLoader.log("DOCX parsing completed at: {} (Duration: {:.2f} seconds)".format(
1007
+ time.strftime("%H:%M:%S"), docx_processing_duration), level="INFO")
683
1008
 
684
1009
  # Log if no valid files were found
685
1010
  if not valid_files:
@@ -693,6 +1018,9 @@ def update_diagnosis_codes(csv_data):
693
1018
  MediLink_ConfigLoader.log("No patient data collected or no matching Patient IDs found. Skipping further processing.", level="INFO")
694
1019
  return # Exit the function early if no data is available
695
1020
 
1021
+ # TIMING: Start CSV data matching
1022
+ csv_matching_start_time = time.time()
1023
+
696
1024
  # Get Medisoft shorthand dictionary from crosswalk.
697
1025
  diagnosis_to_medisoft = crosswalk.get('diagnosis_to_medisoft', {})
698
1026
 
@@ -736,9 +1064,27 @@ def update_diagnosis_codes(csv_data):
736
1064
  else:
737
1065
  MediLink_ConfigLoader.log("Patient ID: {} not found in DOCX data for row {}.".format(patient_id, row_num), level="INFO")
738
1066
 
1067
+ # TIMING: End CSV data matching
1068
+ csv_matching_end_time = time.time()
1069
+ csv_matching_duration = csv_matching_end_time - csv_matching_start_time
1070
+
739
1071
  # Log total count of updated rows
740
1072
  MediLink_ConfigLoader.log("Total {} 'Default Diagnosis #1' rows updated.".format(updated_count), level="INFO")
741
1073
 
1074
+ # TIMING: End surgery schedule parsing timing
1075
+ parsing_end_time = time.time()
1076
+ parsing_duration = parsing_end_time - parsing_start_time
1077
+ if PERFORMANCE_LOGGING:
1078
+ print("Surgery schedule parsing completed at: {} (Duration: {:.2f} seconds)".format(
1079
+ time.strftime("%H:%M:%S"), parsing_duration))
1080
+ print(" - File system operations: {:.2f} seconds ({:.1f}%)".format(filesystem_duration, (filesystem_duration/parsing_duration)*100))
1081
+ print(" - CSV data preprocessing: {:.2f} seconds ({:.1f}%)".format(csv_prep_duration, (csv_prep_duration/parsing_duration)*100))
1082
+ print(" - DOCX file processing: {:.2f} seconds ({:.1f}%)".format(docx_processing_duration, (docx_processing_duration/parsing_duration)*100))
1083
+ print(" - CSV data matching: {:.2f} seconds ({:.1f}%)".format(csv_matching_duration, (csv_matching_duration/parsing_duration)*100))
1084
+ print(" - Files processed: {}, Files skipped: {}, Parse errors: {}".format(docx_files_processed, docx_files_skipped, docx_parse_errors))
1085
+ MediLink_ConfigLoader.log("Surgery schedule parsing completed at: {} (Duration: {:.2f} seconds)".format(
1086
+ time.strftime("%H:%M:%S"), parsing_duration), level="INFO")
1087
+
742
1088
  except Exception as e:
743
1089
  message = "An error occurred while updating diagnosis codes. Please check the DOCX files and configuration: {}".format(e)
744
1090
  MediLink_ConfigLoader.log(message, level="ERROR")
@@ -802,11 +1148,59 @@ def load_insurance_data_from_mains(config):
802
1148
  # Initialize the dictionary to hold the insurance to insurance ID mappings
803
1149
  insurance_to_id = {}
804
1150
 
805
- # Read data from MAINS using a provided function to handle fixed-width data
806
- for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
807
- insurance_name = record['MAINSNAME']
808
- # Assuming line_number gives the correct insurance ID without needing adjustment
809
- insurance_to_id[insurance_name] = line_number
1151
+ try:
1152
+ # Check if MAINS file exists before attempting to read
1153
+ if not os.path.exists(mains_path):
1154
+ error_msg = "CRITICAL: MAINS file not found at: {}. This file is required for insurance name to Medisoft ID mapping.".format(mains_path)
1155
+ MediLink_ConfigLoader.log(error_msg, level="CRITICAL")
1156
+ print("\n" + "="*80)
1157
+ print("CRITICAL ERROR: MAINS FILE MISSING")
1158
+ print("="*80)
1159
+ print("\nThe MAINS file is required for the following critical functions:")
1160
+ print("* Mapping insurance company names to Medisoft IDs")
1161
+ print("* Converting insurance names to payer IDs for claim submission")
1162
+ print("* Creating properly formatted 837p claim files")
1163
+ print("\nWithout this file, claim submission will fail because:")
1164
+ print("* Insurance names cannot be converted to payer IDs")
1165
+ print("* 837p claim files cannot be generated")
1166
+ print("* Claims cannot be submitted to insurance companies")
1167
+ print("\nTO FIX THIS:")
1168
+ print("1. Ensure the MAINS file exists at: {}".format(mains_path))
1169
+ print("2. If the file is missing, llamar a Dani")
1170
+ print("3. The file should contain insurance company data from your Medisoft system")
1171
+ print("="*80)
1172
+ return insurance_to_id
1173
+
1174
+ # Read data from MAINS using a provided function to handle fixed-width data
1175
+ for record, line_number in MediLink_DataMgmt.read_general_fixed_width_data(mains_path, mains_slices):
1176
+ insurance_name = record['MAINSNAME']
1177
+ # Assuming line_number gives the correct insurance ID without needing adjustment
1178
+ insurance_to_id[insurance_name] = line_number
1179
+
1180
+ MediLink_ConfigLoader.log("Successfully loaded {} insurance records from MAINS".format(len(insurance_to_id)), level="INFO")
1181
+
1182
+ except FileNotFoundError:
1183
+ error_msg = "CRITICAL: MAINS file not found: {}. This file is required for insurance name to Medisoft ID mapping.".format(mains_path)
1184
+ MediLink_ConfigLoader.log(error_msg, level="CRITICAL")
1185
+ print("\n" + "="*80)
1186
+ print("CRITICAL ERROR: MAINS FILE MISSING")
1187
+ print("="*80)
1188
+ print("\nThe MAINS file is required for the following critical functions:")
1189
+ print("* Mapping insurance company names to Medisoft IDs")
1190
+ print("* Converting insurance names to payer IDs for claim submission")
1191
+ print("* Creating properly formatted 837p claim files")
1192
+ print("\nWithout this file, claim submission will fail because:")
1193
+ print("* Insurance names cannot be converted to payer IDs")
1194
+ print("* 837p claim files cannot be generated")
1195
+ print("* Claims cannot be submitted to insurance companies")
1196
+ print("\nTO FIX THIS:")
1197
+ print("1. Ensure the MAINS file exists at: {}".format(mains_path))
1198
+ print("2. If the file is missing, llamar a Dani")
1199
+ print("3. The file should contain insurance company data from your Medisoft system")
1200
+ print("="*80)
1201
+ except Exception as e:
1202
+ MediLink_ConfigLoader.log("Error loading MAINS data: {}. Continuing without MAINS data.".format(str(e)), level="ERROR")
1203
+ print("Error loading MAINS data: {}. Continuing without MAINS data.".format(str(e)))
810
1204
 
811
1205
  return insurance_to_id
812
1206
 
@@ -852,7 +1246,7 @@ def parse_z_dat(z_dat_path, config): # Why is this in MediBot and not MediLink?
852
1246
  try:
853
1247
  # Reading blocks of fixed-width data (up to 5 lines per record)
854
1248
  for personal_info, insurance_info, service_info, service_info_2, service_info_3 in MediLink_DataMgmt.read_fixed_width_data(z_dat_path):
855
- # Parsing the data using slice definitions from the config
1249
+ # Parse Z.dat reserved record format: 3 active + 2 reserved lines
856
1250
  parsed_data = MediLink_DataMgmt.parse_fixed_width_data(personal_info, insurance_info, service_info, service_info_2, service_info_3, config.get('MediLink_Config', config))
857
1251
 
858
1252
  # Extract Patient ID and Insurance Name from parsed data