kmisc 2.1.123__py3-none-any.whl → 2.1.124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kmisc/__init__.py CHANGED
@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
3453
3453
  if default in ['org',{'org'}]: return src
3454
3454
  return default
3455
3455
 
3456
- def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
3456
+ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
3457
3457
  #auth_fields.submit.type : name : login button with name
3458
3458
  # : id : login button with id
3459
3459
  # : submit : submit button without name or id
@@ -3488,11 +3488,12 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3488
3488
  Import('filecmp')
3489
3489
  Import('shutil')
3490
3490
  if capture_type in ['mov','mp4']:
3491
+ print('????load cv2')
3491
3492
  Import('cv2',install_name='opencv-python')
3492
3493
 
3493
3494
  ocr=None
3494
3495
  if capture_method != 'file':
3495
- ocr=OCR(enhance=ocr_enhance)
3496
+ ocr=OCR(enhance=ocr_enhance,module=ocr_module)
3496
3497
  # Configure Chrome options for headless mode
3497
3498
  from selenium.webdriver.chrome.options import Options
3498
3499
  from selenium.webdriver.common.by import By
@@ -3513,6 +3514,12 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3513
3514
  # Initialize the Chrome driver
3514
3515
  driver = selenium.webdriver.Chrome(options=chrome_options)
3515
3516
  if image_size.lower() in ['full','fullscreen','full_screen','auto']:
3517
+ #original_size = driver.get_window_size()
3518
+ #full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
3519
+ #full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
3520
+ #driver.set_window_size(full_width, full_height)
3521
+ # code save screenshot
3522
+ #driver.set_window_size(original_size['width'], original_size['height']) #restore size
3516
3523
  driver.maximize_window()
3517
3524
  rc=False,output_file
3518
3525
  try:
@@ -3655,10 +3662,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3655
3662
  else:
3656
3663
  printf(found_strings,log=log,mode='d')
3657
3664
  if find_string:
3658
- if find_string in found_strings:
3659
- #Find exit string, So True, So True, So True, So True
3660
- driver.quit()
3661
- return True
3665
+ if not isinstance(find_string,list):
3666
+ find_string=[find_string]
3667
+ for ff in find_string:
3668
+ if ff in found_strings:
3669
+ #Find exit string, So True, So True, So True, So True
3670
+ driver.quit()
3671
+ return True
3662
3672
  backup_idx+=1
3663
3673
  #capture interval
3664
3674
  time.sleep(wait_time)
@@ -3712,42 +3722,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3712
3722
  # return rc
3713
3723
 
3714
3724
  class OCR:
3715
- def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
3725
+ def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
3716
3726
  self.enhance=enhance
3717
3727
  self.image_file=image_file
3718
- Import('easyocr')
3719
- if self.enhance:
3720
- Import('PIL',install_name='Pillow')
3721
- Import('numpy')
3722
- self.reader = easyocr.Reader(language,gpu=gpu,model_storage_directory=model_storage_directory)
3723
- # Suppress Torch pin_memory warning
3724
- warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3728
+ self.ocr_module=ocr_module
3729
+ self.language=language
3730
+ self.gpu=gpu
3731
+ self.model_storage_directory=model_storage_directory
3732
+ if self.ocr_module == 'pytesseract':
3733
+ Import('pytesseract')
3734
+ Import('import numpy as np')
3735
+ Import('cv2',install_name='opencv-python')
3736
+ else:
3737
+ Import('easyocr')
3738
+ if self.enhance:
3739
+ Import('PIL',install_name='Pillow')
3740
+ Import('numpy')
3741
+ self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
3742
+ # Suppress Torch pin_memory warning
3743
+ warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3725
3744
 
3726
- # Suppress EasyOCR CPU warning
3727
- #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3745
+ # Suppress EasyOCR CPU warning
3746
+ #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3728
3747
 
3729
- # Suppress NetworkX backend warning
3730
- warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3748
+ # Suppress NetworkX backend warning
3749
+ warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3731
3750
 
3732
- def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
3751
+ def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
3733
3752
  if not image_file: image_file=self.image_file
3734
3753
  if not image_file: return False
3735
- opts={}
3736
- opts['detail']=detail
3737
- if isinstance(low_text,float): opts['low_text']=low_test
3738
- if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3739
- if self.enhance:
3740
- image = PIL.Image.open(image_file)
3741
- image = image.convert('L') #Grayscale
3742
- image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3743
- image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3744
- image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3745
- # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3746
- image.save(image_file)
3747
- # image_np = numpy.array(image)
3748
- # return self.reader.readtext(image_np,**opts)
3749
- # else:
3750
- return self.reader.readtext(image_file,**opts)
3754
+ if not os.path.isfile(image_file): return False
3755
+ if self.ocr_module == 'pytesseract':
3756
+ image = cv2.imread(image_file)
3757
+ # Convert to grayscale
3758
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
3759
+
3760
+ # Invert the image to make text black on white (Tesseract prefers this)
3761
+ inverted = cv2.bitwise_not(gray)
3762
+
3763
+ # Light noise reduction with small Gaussian blur (fast)
3764
+ blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
3765
+
3766
+ # Optional CLAHE for contrast (comment out if too slow; it's generally fast)
3767
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
3768
+ enhanced = clahe.apply(blurred)
3769
+
3770
+ # Small upscale (1.5x) for better DPI without heavy computation
3771
+ scale_factor = 1.5
3772
+ resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) # LINEAR is faster than CUBIC
3773
+
3774
+ # Adaptive thresholding for varying console text quality
3775
+ thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
3776
+
3777
+ # Minimal morphological cleanup (small kernel for speed)
3778
+ kernel = np.ones((2, 2), np.uint8)
3779
+ cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
3780
+ lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
3781
+ if not lang or lang == 'en': lang='eng'
3782
+ try:
3783
+ text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
3784
+ except:
3785
+ text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
3786
+ if output is str:
3787
+ return text
3788
+ else:
3789
+ return text.split()
3790
+ else:
3791
+ opts={}
3792
+ opts['detail']=detail
3793
+ if isinstance(low_text,float): opts['low_text']=low_test
3794
+ if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3795
+ if self.enhance:
3796
+ image = PIL.Image.open(image_file)
3797
+ image = image.convert('L') #Grayscale
3798
+ image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3799
+ image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3800
+ image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3801
+ # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3802
+ image.save(image_file)
3803
+ # image_np = numpy.array(image)
3804
+ # return self.reader.readtext(image_np,**opts)
3805
+ # else:
3806
+ text=self.reader.readtext(image_file,**opts)
3807
+ if output is str:
3808
+ return ' '.join(text)
3809
+ else:
3810
+ return text
3751
3811
 
3752
3812
  ############################################
3753
3813
  #Temporary function map for replacement
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kmisc
3
- Version: 2.1.123
3
+ Version: 2.1.124
4
4
  Summary: Enginering useful library
5
5
  Home-page: https://github.com/kagepark/kmisc
6
6
  Author: Kage Park
@@ -0,0 +1,6 @@
1
+ kmisc/__init__.py,sha256=9KNDkVxjqk5_f1h8TRJMwItMXSZFyXBAzwK4MKR-6aM,163758
2
+ kmisc-2.1.124.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
+ kmisc-2.1.124.dist-info/METADATA,sha256=0Jeai7LsZye2IPSLxdVv8EfoU37IpRSJYJb5D4J1io8,5523
4
+ kmisc-2.1.124.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
+ kmisc-2.1.124.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
+ kmisc-2.1.124.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- kmisc/__init__.py,sha256=pu-2QG3sY7W_Vb6eJmpo7Mytvjs4DfaJseGNJN1GmgA,160300
2
- kmisc-2.1.123.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
- kmisc-2.1.123.dist-info/METADATA,sha256=S9rNpdC9R3EUiB9m6p-DZyZcq16Sle34LwxrSzBaYoc,5523
4
- kmisc-2.1.123.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
- kmisc-2.1.123.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
- kmisc-2.1.123.dist-info/RECORD,,