kmisc 2.1.123__tar.gz → 2.1.125__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kmisc-2.1.123 → kmisc-2.1.125}/PKG-INFO +1 -1
- {kmisc-2.1.123 → kmisc-2.1.125}/kmisc/__init__.py +95 -36
- {kmisc-2.1.123 → kmisc-2.1.125}/kmisc.egg-info/PKG-INFO +1 -1
- {kmisc-2.1.123 → kmisc-2.1.125}/LICENSE +0 -0
- {kmisc-2.1.123 → kmisc-2.1.125}/README.md +0 -0
- {kmisc-2.1.123 → kmisc-2.1.125}/kmisc.egg-info/SOURCES.txt +0 -0
- {kmisc-2.1.123 → kmisc-2.1.125}/kmisc.egg-info/dependency_links.txt +0 -0
- {kmisc-2.1.123 → kmisc-2.1.125}/kmisc.egg-info/top_level.txt +0 -0
- {kmisc-2.1.123 → kmisc-2.1.125}/pyproject.toml +0 -0
- {kmisc-2.1.123 → kmisc-2.1.125}/setup.cfg +0 -0
- {kmisc-2.1.123 → kmisc-2.1.125}/setup.py +0 -0
@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
|
|
3453
3453
|
if default in ['org',{'org'}]: return src
|
3454
3454
|
return default
|
3455
3455
|
|
3456
|
-
def web_capture(url,output_file,image_size='
|
3456
|
+
def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
|
3457
3457
|
#auth_fields.submit.type : name : login button with name
|
3458
3458
|
# : id : login button with id
|
3459
3459
|
# : submit : submit button without name or id
|
@@ -3492,7 +3492,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3492
3492
|
|
3493
3493
|
ocr=None
|
3494
3494
|
if capture_method != 'file':
|
3495
|
-
ocr=OCR(enhance=ocr_enhance)
|
3495
|
+
ocr=OCR(enhance=ocr_enhance,module=ocr_module)
|
3496
3496
|
# Configure Chrome options for headless mode
|
3497
3497
|
from selenium.webdriver.chrome.options import Options
|
3498
3498
|
from selenium.webdriver.common.by import By
|
@@ -3513,6 +3513,12 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3513
3513
|
# Initialize the Chrome driver
|
3514
3514
|
driver = selenium.webdriver.Chrome(options=chrome_options)
|
3515
3515
|
if image_size.lower() in ['full','fullscreen','full_screen','auto']:
|
3516
|
+
#original_size = driver.get_window_size()
|
3517
|
+
#full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
|
3518
|
+
#full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
|
3519
|
+
#driver.set_window_size(full_width, full_height)
|
3520
|
+
# code save screenshot
|
3521
|
+
#driver.set_window_size(original_size['width'], original_size['height']) #restore size
|
3516
3522
|
driver.maximize_window()
|
3517
3523
|
rc=False,output_file
|
3518
3524
|
try:
|
@@ -3655,10 +3661,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3655
3661
|
else:
|
3656
3662
|
printf(found_strings,log=log,mode='d')
|
3657
3663
|
if find_string:
|
3658
|
-
if find_string
|
3659
|
-
|
3660
|
-
|
3661
|
-
|
3664
|
+
if not isinstance(find_string,list):
|
3665
|
+
find_string=[find_string]
|
3666
|
+
for ff in find_string:
|
3667
|
+
if ff in found_strings:
|
3668
|
+
#Find exit string, So True, So True, So True, So True
|
3669
|
+
driver.quit()
|
3670
|
+
return True
|
3662
3671
|
backup_idx+=1
|
3663
3672
|
#capture interval
|
3664
3673
|
time.sleep(wait_time)
|
@@ -3693,7 +3702,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3693
3702
|
#Background running
|
3694
3703
|
if daemon:
|
3695
3704
|
t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file))
|
3696
|
-
return t
|
3705
|
+
return True,t
|
3697
3706
|
else:
|
3698
3707
|
#Single process running
|
3699
3708
|
rc=_capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file)
|
@@ -3712,42 +3721,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3712
3721
|
# return rc
|
3713
3722
|
|
3714
3723
|
class OCR:
|
3715
|
-
def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
|
3724
|
+
def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
|
3716
3725
|
self.enhance=enhance
|
3717
3726
|
self.image_file=image_file
|
3718
|
-
|
3719
|
-
|
3720
|
-
|
3721
|
-
|
3722
|
-
self.
|
3723
|
-
|
3724
|
-
|
3727
|
+
self.ocr_module=ocr_module
|
3728
|
+
self.language=language
|
3729
|
+
self.gpu=gpu
|
3730
|
+
self.model_storage_directory=model_storage_directory
|
3731
|
+
if self.ocr_module == 'pytesseract':
|
3732
|
+
Import('pytesseract')
|
3733
|
+
Import('import numpy as np')
|
3734
|
+
Import('cv2',install_name='opencv-python')
|
3735
|
+
else:
|
3736
|
+
Import('easyocr')
|
3737
|
+
if self.enhance:
|
3738
|
+
Import('PIL',install_name='Pillow')
|
3739
|
+
Import('numpy')
|
3740
|
+
self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
|
3741
|
+
# Suppress Torch pin_memory warning
|
3742
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
|
3725
3743
|
|
3726
|
-
|
3727
|
-
|
3744
|
+
# Suppress EasyOCR CPU warning
|
3745
|
+
#warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
|
3728
3746
|
|
3729
|
-
|
3730
|
-
|
3747
|
+
# Suppress NetworkX backend warning
|
3748
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
|
3731
3749
|
|
3732
|
-
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
|
3750
|
+
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
|
3733
3751
|
if not image_file: image_file=self.image_file
|
3734
3752
|
if not image_file: return False
|
3735
|
-
|
3736
|
-
|
3737
|
-
|
3738
|
-
|
3739
|
-
|
3740
|
-
|
3741
|
-
image
|
3742
|
-
|
3743
|
-
|
3744
|
-
|
3745
|
-
|
3746
|
-
|
3747
|
-
#
|
3748
|
-
|
3749
|
-
|
3750
|
-
|
3753
|
+
if not os.path.isfile(image_file): return False
|
3754
|
+
if self.ocr_module == 'pytesseract':
|
3755
|
+
image = cv2.imread(image_file)
|
3756
|
+
# Convert to grayscale
|
3757
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
3758
|
+
|
3759
|
+
# Invert the image to make text black on white (Tesseract prefers this)
|
3760
|
+
inverted = cv2.bitwise_not(gray)
|
3761
|
+
|
3762
|
+
# Light noise reduction with small Gaussian blur (fast)
|
3763
|
+
blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
|
3764
|
+
|
3765
|
+
# Optional CLAHE for contrast (comment out if too slow; it's generally fast)
|
3766
|
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
3767
|
+
enhanced = clahe.apply(blurred)
|
3768
|
+
|
3769
|
+
# Small upscale (1.5x) for better DPI without heavy computation
|
3770
|
+
scale_factor = 1.5
|
3771
|
+
resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) # LINEAR is faster than CUBIC
|
3772
|
+
|
3773
|
+
# Adaptive thresholding for varying console text quality
|
3774
|
+
thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
3775
|
+
|
3776
|
+
# Minimal morphological cleanup (small kernel for speed)
|
3777
|
+
kernel = np.ones((2, 2), np.uint8)
|
3778
|
+
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
3779
|
+
lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
|
3780
|
+
if not lang or lang == 'en': lang='eng'
|
3781
|
+
try:
|
3782
|
+
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
|
3783
|
+
except:
|
3784
|
+
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
|
3785
|
+
if output is str:
|
3786
|
+
return text
|
3787
|
+
else:
|
3788
|
+
return text.split()
|
3789
|
+
else:
|
3790
|
+
opts={}
|
3791
|
+
opts['detail']=detail
|
3792
|
+
if isinstance(low_text,float): opts['low_text']=low_test
|
3793
|
+
if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
|
3794
|
+
if self.enhance:
|
3795
|
+
image = PIL.Image.open(image_file)
|
3796
|
+
image = image.convert('L') #Grayscale
|
3797
|
+
image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
|
3798
|
+
image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
|
3799
|
+
image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
|
3800
|
+
# image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
|
3801
|
+
image.save(image_file)
|
3802
|
+
# image_np = numpy.array(image)
|
3803
|
+
# return self.reader.readtext(image_np,**opts)
|
3804
|
+
# else:
|
3805
|
+
text=self.reader.readtext(image_file,**opts)
|
3806
|
+
if output is str:
|
3807
|
+
return ' '.join(text)
|
3808
|
+
else:
|
3809
|
+
return text
|
3751
3810
|
|
3752
3811
|
############################################
|
3753
3812
|
#Temporary function map for replacement
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|