kmisc 2.1.123__py3-none-any.whl → 2.1.125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kmisc/__init__.py CHANGED
@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
3453
3453
  if default in ['org',{'org'}]: return src
3454
3454
  return default
3455
3455
 
3456
- def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
3456
+ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
3457
3457
  #auth_fields.submit.type : name : login button with name
3458
3458
  # : id : login button with id
3459
3459
  # : submit : submit button without name or id
@@ -3492,7 +3492,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3492
3492
 
3493
3493
  ocr=None
3494
3494
  if capture_method != 'file':
3495
- ocr=OCR(enhance=ocr_enhance)
3495
+ ocr=OCR(enhance=ocr_enhance,module=ocr_module)
3496
3496
  # Configure Chrome options for headless mode
3497
3497
  from selenium.webdriver.chrome.options import Options
3498
3498
  from selenium.webdriver.common.by import By
@@ -3513,6 +3513,12 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3513
3513
  # Initialize the Chrome driver
3514
3514
  driver = selenium.webdriver.Chrome(options=chrome_options)
3515
3515
  if image_size.lower() in ['full','fullscreen','full_screen','auto']:
3516
+ #original_size = driver.get_window_size()
3517
+ #full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
3518
+ #full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
3519
+ #driver.set_window_size(full_width, full_height)
3520
+ # code save screenshot
3521
+ #driver.set_window_size(original_size['width'], original_size['height']) #restore size
3516
3522
  driver.maximize_window()
3517
3523
  rc=False,output_file
3518
3524
  try:
@@ -3655,10 +3661,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3655
3661
  else:
3656
3662
  printf(found_strings,log=log,mode='d')
3657
3663
  if find_string:
3658
- if find_string in found_strings:
3659
- #Find exit string, So True, So True, So True, So True
3660
- driver.quit()
3661
- return True
3664
+ if not isinstance(find_string,list):
3665
+ find_string=[find_string]
3666
+ for ff in find_string:
3667
+ if ff in found_strings:
3668
+ #Find exit string, So True, So True, So True, So True
3669
+ driver.quit()
3670
+ return True
3662
3671
  backup_idx+=1
3663
3672
  #capture interval
3664
3673
  time.sleep(wait_time)
@@ -3693,7 +3702,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3693
3702
  #Background running
3694
3703
  if daemon:
3695
3704
  t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file))
3696
- return t
3705
+ return True,t
3697
3706
  else:
3698
3707
  #Single process running
3699
3708
  rc=_capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file)
@@ -3712,42 +3721,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3712
3721
  # return rc
3713
3722
 
3714
3723
  class OCR:
3715
- def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
3724
+ def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
3716
3725
  self.enhance=enhance
3717
3726
  self.image_file=image_file
3718
- Import('easyocr')
3719
- if self.enhance:
3720
- Import('PIL',install_name='Pillow')
3721
- Import('numpy')
3722
- self.reader = easyocr.Reader(language,gpu=gpu,model_storage_directory=model_storage_directory)
3723
- # Suppress Torch pin_memory warning
3724
- warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3727
+ self.ocr_module=ocr_module
3728
+ self.language=language
3729
+ self.gpu=gpu
3730
+ self.model_storage_directory=model_storage_directory
3731
+ if self.ocr_module == 'pytesseract':
3732
+ Import('pytesseract')
3733
+ Import('import numpy as np')
3734
+ Import('cv2',install_name='opencv-python')
3735
+ else:
3736
+ Import('easyocr')
3737
+ if self.enhance:
3738
+ Import('PIL',install_name='Pillow')
3739
+ Import('numpy')
3740
+ self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
3741
+ # Suppress Torch pin_memory warning
3742
+ warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3725
3743
 
3726
- # Suppress EasyOCR CPU warning
3727
- #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3744
+ # Suppress EasyOCR CPU warning
3745
+ #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3728
3746
 
3729
- # Suppress NetworkX backend warning
3730
- warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3747
+ # Suppress NetworkX backend warning
3748
+ warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3731
3749
 
3732
- def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
3750
+ def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
3733
3751
  if not image_file: image_file=self.image_file
3734
3752
  if not image_file: return False
3735
- opts={}
3736
- opts['detail']=detail
3737
- if isinstance(low_text,float): opts['low_text']=low_test
3738
- if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3739
- if self.enhance:
3740
- image = PIL.Image.open(image_file)
3741
- image = image.convert('L') #Grayscale
3742
- image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3743
- image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3744
- image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3745
- # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3746
- image.save(image_file)
3747
- # image_np = numpy.array(image)
3748
- # return self.reader.readtext(image_np,**opts)
3749
- # else:
3750
- return self.reader.readtext(image_file,**opts)
3753
+ if not os.path.isfile(image_file): return False
3754
+ if self.ocr_module == 'pytesseract':
3755
+ image = cv2.imread(image_file)
3756
+ # Convert to grayscale
3757
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
3758
+
3759
+ # Invert the image to make text black on white (Tesseract prefers this)
3760
+ inverted = cv2.bitwise_not(gray)
3761
+
3762
+ # Light noise reduction with small Gaussian blur (fast)
3763
+ blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
3764
+
3765
+ # Optional CLAHE for contrast (comment out if too slow; it's generally fast)
3766
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
3767
+ enhanced = clahe.apply(blurred)
3768
+
3769
+ # Small upscale (1.5x) for better DPI without heavy computation
3770
+ scale_factor = 1.5
3771
+ resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) # LINEAR is faster than CUBIC
3772
+
3773
+ # Adaptive thresholding for varying console text quality
3774
+ thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
3775
+
3776
+ # Minimal morphological cleanup (small kernel for speed)
3777
+ kernel = np.ones((2, 2), np.uint8)
3778
+ cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
3779
+ lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
3780
+ if not lang or lang == 'en': lang='eng'
3781
+ try:
3782
+ text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
3783
+ except:
3784
+ text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
3785
+ if output is str:
3786
+ return text
3787
+ else:
3788
+ return text.split()
3789
+ else:
3790
+ opts={}
3791
+ opts['detail']=detail
3792
+ if isinstance(low_text,float): opts['low_text']=low_test
3793
+ if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3794
+ if self.enhance:
3795
+ image = PIL.Image.open(image_file)
3796
+ image = image.convert('L') #Grayscale
3797
+ image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3798
+ image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3799
+ image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3800
+ # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3801
+ image.save(image_file)
3802
+ # image_np = numpy.array(image)
3803
+ # return self.reader.readtext(image_np,**opts)
3804
+ # else:
3805
+ text=self.reader.readtext(image_file,**opts)
3806
+ if output is str:
3807
+ return ' '.join(text)
3808
+ else:
3809
+ return text
3751
3810
 
3752
3811
  ############################################
3753
3812
  #Temporary function map for replacement
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kmisc
3
- Version: 2.1.123
3
+ Version: 2.1.125
4
4
  Summary: Enginering useful library
5
5
  Home-page: https://github.com/kagepark/kmisc
6
6
  Author: Kage Park
@@ -0,0 +1,6 @@
1
+ kmisc/__init__.py,sha256=clNYHP2yuY6LqEG9lobGvdMyIA3EocDSsTJUfZTIs9U,163729
2
+ kmisc-2.1.125.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
+ kmisc-2.1.125.dist-info/METADATA,sha256=7na-3ZkC58XtrxZtn77ONgxR39jKufWqZLdEjo0dZxs,5523
4
+ kmisc-2.1.125.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
+ kmisc-2.1.125.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
+ kmisc-2.1.125.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- kmisc/__init__.py,sha256=pu-2QG3sY7W_Vb6eJmpo7Mytvjs4DfaJseGNJN1GmgA,160300
2
- kmisc-2.1.123.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
- kmisc-2.1.123.dist-info/METADATA,sha256=S9rNpdC9R3EUiB9m6p-DZyZcq16Sle34LwxrSzBaYoc,5523
4
- kmisc-2.1.123.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
- kmisc-2.1.123.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
- kmisc-2.1.123.dist-info/RECORD,,