kmisc 2.1.123__py3-none-any.whl → 2.1.124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kmisc/__init__.py +95 -35
- {kmisc-2.1.123.dist-info → kmisc-2.1.124.dist-info}/METADATA +1 -1
- kmisc-2.1.124.dist-info/RECORD +6 -0
- kmisc-2.1.123.dist-info/RECORD +0 -6
- {kmisc-2.1.123.dist-info → kmisc-2.1.124.dist-info}/LICENSE +0 -0
- {kmisc-2.1.123.dist-info → kmisc-2.1.124.dist-info}/WHEEL +0 -0
- {kmisc-2.1.123.dist-info → kmisc-2.1.124.dist-info}/top_level.txt +0 -0
kmisc/__init__.py
CHANGED
@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
|
|
3453
3453
|
if default in ['org',{'org'}]: return src
|
3454
3454
|
return default
|
3455
3455
|
|
3456
|
-
def web_capture(url,output_file,image_size='
|
3456
|
+
def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
|
3457
3457
|
#auth_fields.submit.type : name : login button with name
|
3458
3458
|
# : id : login button with id
|
3459
3459
|
# : submit : submit button without name or id
|
@@ -3488,11 +3488,12 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3488
3488
|
Import('filecmp')
|
3489
3489
|
Import('shutil')
|
3490
3490
|
if capture_type in ['mov','mp4']:
|
3491
|
+
print('????load cv2')
|
3491
3492
|
Import('cv2',install_name='opencv-python')
|
3492
3493
|
|
3493
3494
|
ocr=None
|
3494
3495
|
if capture_method != 'file':
|
3495
|
-
ocr=OCR(enhance=ocr_enhance)
|
3496
|
+
ocr=OCR(enhance=ocr_enhance,module=ocr_module)
|
3496
3497
|
# Configure Chrome options for headless mode
|
3497
3498
|
from selenium.webdriver.chrome.options import Options
|
3498
3499
|
from selenium.webdriver.common.by import By
|
@@ -3513,6 +3514,12 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3513
3514
|
# Initialize the Chrome driver
|
3514
3515
|
driver = selenium.webdriver.Chrome(options=chrome_options)
|
3515
3516
|
if image_size.lower() in ['full','fullscreen','full_screen','auto']:
|
3517
|
+
#original_size = driver.get_window_size()
|
3518
|
+
#full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
|
3519
|
+
#full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
|
3520
|
+
#driver.set_window_size(full_width, full_height)
|
3521
|
+
# code save screenshot
|
3522
|
+
#driver.set_window_size(original_size['width'], original_size['height']) #restore size
|
3516
3523
|
driver.maximize_window()
|
3517
3524
|
rc=False,output_file
|
3518
3525
|
try:
|
@@ -3655,10 +3662,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3655
3662
|
else:
|
3656
3663
|
printf(found_strings,log=log,mode='d')
|
3657
3664
|
if find_string:
|
3658
|
-
if find_string
|
3659
|
-
|
3660
|
-
|
3661
|
-
|
3665
|
+
if not isinstance(find_string,list):
|
3666
|
+
find_string=[find_string]
|
3667
|
+
for ff in find_string:
|
3668
|
+
if ff in found_strings:
|
3669
|
+
#Find exit string, So True, So True, So True, So True
|
3670
|
+
driver.quit()
|
3671
|
+
return True
|
3662
3672
|
backup_idx+=1
|
3663
3673
|
#capture interval
|
3664
3674
|
time.sleep(wait_time)
|
@@ -3712,42 +3722,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3712
3722
|
# return rc
|
3713
3723
|
|
3714
3724
|
class OCR:
|
3715
|
-
def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
|
3725
|
+
def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
|
3716
3726
|
self.enhance=enhance
|
3717
3727
|
self.image_file=image_file
|
3718
|
-
|
3719
|
-
|
3720
|
-
|
3721
|
-
|
3722
|
-
self.
|
3723
|
-
|
3724
|
-
|
3728
|
+
self.ocr_module=ocr_module
|
3729
|
+
self.language=language
|
3730
|
+
self.gpu=gpu
|
3731
|
+
self.model_storage_directory=model_storage_directory
|
3732
|
+
if self.ocr_module == 'pytesseract':
|
3733
|
+
Import('pytesseract')
|
3734
|
+
Import('import numpy as np')
|
3735
|
+
Import('cv2',install_name='opencv-python')
|
3736
|
+
else:
|
3737
|
+
Import('easyocr')
|
3738
|
+
if self.enhance:
|
3739
|
+
Import('PIL',install_name='Pillow')
|
3740
|
+
Import('numpy')
|
3741
|
+
self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
|
3742
|
+
# Suppress Torch pin_memory warning
|
3743
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
|
3725
3744
|
|
3726
|
-
|
3727
|
-
|
3745
|
+
# Suppress EasyOCR CPU warning
|
3746
|
+
#warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
|
3728
3747
|
|
3729
|
-
|
3730
|
-
|
3748
|
+
# Suppress NetworkX backend warning
|
3749
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
|
3731
3750
|
|
3732
|
-
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
|
3751
|
+
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
|
3733
3752
|
if not image_file: image_file=self.image_file
|
3734
3753
|
if not image_file: return False
|
3735
|
-
|
3736
|
-
|
3737
|
-
|
3738
|
-
|
3739
|
-
|
3740
|
-
|
3741
|
-
image
|
3742
|
-
|
3743
|
-
|
3744
|
-
|
3745
|
-
|
3746
|
-
|
3747
|
-
#
|
3748
|
-
|
3749
|
-
|
3750
|
-
|
3754
|
+
if not os.path.isfile(image_file): return False
|
3755
|
+
if self.ocr_module == 'pytesseract':
|
3756
|
+
image = cv2.imread(image_file)
|
3757
|
+
# Convert to grayscale
|
3758
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
3759
|
+
|
3760
|
+
# Invert the image to make text black on white (Tesseract prefers this)
|
3761
|
+
inverted = cv2.bitwise_not(gray)
|
3762
|
+
|
3763
|
+
# Light noise reduction with small Gaussian blur (fast)
|
3764
|
+
blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
|
3765
|
+
|
3766
|
+
# Optional CLAHE for contrast (comment out if too slow; it's generally fast)
|
3767
|
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
3768
|
+
enhanced = clahe.apply(blurred)
|
3769
|
+
|
3770
|
+
# Small upscale (1.5x) for better DPI without heavy computation
|
3771
|
+
scale_factor = 1.5
|
3772
|
+
resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) # LINEAR is faster than CUBIC
|
3773
|
+
|
3774
|
+
# Adaptive thresholding for varying console text quality
|
3775
|
+
thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
3776
|
+
|
3777
|
+
# Minimal morphological cleanup (small kernel for speed)
|
3778
|
+
kernel = np.ones((2, 2), np.uint8)
|
3779
|
+
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
3780
|
+
lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
|
3781
|
+
if not lang or lang == 'en': lang='eng'
|
3782
|
+
try:
|
3783
|
+
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
|
3784
|
+
except:
|
3785
|
+
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
|
3786
|
+
if output is str:
|
3787
|
+
return text
|
3788
|
+
else:
|
3789
|
+
return text.split()
|
3790
|
+
else:
|
3791
|
+
opts={}
|
3792
|
+
opts['detail']=detail
|
3793
|
+
if isinstance(low_text,float): opts['low_text']=low_test
|
3794
|
+
if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
|
3795
|
+
if self.enhance:
|
3796
|
+
image = PIL.Image.open(image_file)
|
3797
|
+
image = image.convert('L') #Grayscale
|
3798
|
+
image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
|
3799
|
+
image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
|
3800
|
+
image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
|
3801
|
+
# image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
|
3802
|
+
image.save(image_file)
|
3803
|
+
# image_np = numpy.array(image)
|
3804
|
+
# return self.reader.readtext(image_np,**opts)
|
3805
|
+
# else:
|
3806
|
+
text=self.reader.readtext(image_file,**opts)
|
3807
|
+
if output is str:
|
3808
|
+
return ' '.join(text)
|
3809
|
+
else:
|
3810
|
+
return text
|
3751
3811
|
|
3752
3812
|
############################################
|
3753
3813
|
#Temporary function map for replacement
|
@@ -0,0 +1,6 @@
|
|
1
|
+
kmisc/__init__.py,sha256=9KNDkVxjqk5_f1h8TRJMwItMXSZFyXBAzwK4MKR-6aM,163758
|
2
|
+
kmisc-2.1.124.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
|
3
|
+
kmisc-2.1.124.dist-info/METADATA,sha256=0Jeai7LsZye2IPSLxdVv8EfoU37IpRSJYJb5D4J1io8,5523
|
4
|
+
kmisc-2.1.124.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
5
|
+
kmisc-2.1.124.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
|
6
|
+
kmisc-2.1.124.dist-info/RECORD,,
|
kmisc-2.1.123.dist-info/RECORD
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
kmisc/__init__.py,sha256=pu-2QG3sY7W_Vb6eJmpo7Mytvjs4DfaJseGNJN1GmgA,160300
|
2
|
-
kmisc-2.1.123.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
|
3
|
-
kmisc-2.1.123.dist-info/METADATA,sha256=S9rNpdC9R3EUiB9m6p-DZyZcq16Sle34LwxrSzBaYoc,5523
|
4
|
-
kmisc-2.1.123.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
5
|
-
kmisc-2.1.123.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
|
6
|
-
kmisc-2.1.123.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|