kmisc 2.1.125__tar.gz → 2.1.127__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kmisc-2.1.125 → kmisc-2.1.127}/PKG-INFO +1 -1
- {kmisc-2.1.125 → kmisc-2.1.127}/kmisc/__init__.py +27 -19
- {kmisc-2.1.125 → kmisc-2.1.127}/kmisc.egg-info/PKG-INFO +1 -1
- {kmisc-2.1.125 → kmisc-2.1.127}/LICENSE +0 -0
- {kmisc-2.1.125 → kmisc-2.1.127}/README.md +0 -0
- {kmisc-2.1.125 → kmisc-2.1.127}/kmisc.egg-info/SOURCES.txt +0 -0
- {kmisc-2.1.125 → kmisc-2.1.127}/kmisc.egg-info/dependency_links.txt +0 -0
- {kmisc-2.1.125 → kmisc-2.1.127}/kmisc.egg-info/top_level.txt +0 -0
- {kmisc-2.1.125 → kmisc-2.1.127}/pyproject.toml +0 -0
- {kmisc-2.1.125 → kmisc-2.1.127}/setup.cfg +0 -0
- {kmisc-2.1.125 → kmisc-2.1.127}/setup.py +0 -0
@@ -3462,12 +3462,15 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3462
3462
|
# : id : username/password with name id
|
3463
3463
|
#auth_fields.auth.name : (usernane,password) : username/password field string for name or id
|
3464
3464
|
#next_do : put data and click submit
|
3465
|
-
|
3466
3465
|
_url=url.split('/')
|
3467
3466
|
url=WEB().url_join(*_url[1:],method=_url[0])
|
3468
3467
|
if isinstance(image_size,str):
|
3469
3468
|
if 'x' in image_size:
|
3470
3469
|
image_size=image_size.split('x')
|
3470
|
+
elif 'X' in image_size:
|
3471
|
+
image_size=image_size.split('X')
|
3472
|
+
elif '*' in image_size:
|
3473
|
+
image_size=image_size.split('*')
|
3471
3474
|
elif ',' in image_size:
|
3472
3475
|
image_size=image_size.split(',')
|
3473
3476
|
elif '*' in image_size:
|
@@ -3477,9 +3480,10 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3477
3480
|
if isinstance(image_size,(list,tuple)) and len(image_size) == 2:
|
3478
3481
|
image_size=','.join([str(i) for i in image_size])
|
3479
3482
|
else:
|
3480
|
-
if not IsIn(image_size,[
|
3481
|
-
|
3482
|
-
|
3483
|
+
if not IsIn(image_size,['full','fullscreen','full_screen','full_size']):
|
3484
|
+
if IsIn(image_size,[None,'auto']):
|
3485
|
+
#Set it to default image size
|
3486
|
+
image_size='1920,1080'
|
3483
3487
|
|
3484
3488
|
if Import('import selenium'):
|
3485
3489
|
return False,'Can not install selenium package'
|
@@ -3502,8 +3506,9 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3502
3506
|
chrome_options.add_argument('--headless') # Run in headless mode
|
3503
3507
|
chrome_options.add_argument('--no-sandbox')
|
3504
3508
|
chrome_options.add_argument('--disable-dev-shm-usage')
|
3509
|
+
chrome_options.add_argument('--disable-extensions')
|
3505
3510
|
|
3506
|
-
if image_size.lower() not in ['full','fullscreen','full_screen','
|
3511
|
+
if image_size.lower() not in ['full','fullscreen','full_screen','full_size']:
|
3507
3512
|
chrome_options.add_argument(f"--window-size={image_size}") # Set window size
|
3508
3513
|
if not gpu:
|
3509
3514
|
chrome_options.add_argument("--disable-gpu")
|
@@ -3512,15 +3517,18 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3512
3517
|
chrome_options.add_argument('--allow-insecure-localhost') # gnore-certificate-errors
|
3513
3518
|
# Initialize the Chrome driver
|
3514
3519
|
driver = selenium.webdriver.Chrome(options=chrome_options)
|
3515
|
-
if image_size.lower() in ['full','fullscreen','full_screen','
|
3520
|
+
if image_size.lower() in ['full','fullscreen','full_screen','full_size']:
|
3516
3521
|
#original_size = driver.get_window_size()
|
3517
3522
|
#full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
|
3518
3523
|
#full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
|
3519
3524
|
#driver.set_window_size(full_width, full_height)
|
3520
|
-
# code save screenshot
|
3521
|
-
#driver.set_window_size(original_size['width'], original_size['height']) #restore size
|
3522
3525
|
driver.maximize_window()
|
3523
3526
|
rc=False,output_file
|
3527
|
+
#Cleanup first
|
3528
|
+
if isinstance(output_file,str) and output_file:
|
3529
|
+
if os.path.isfile(output_file):
|
3530
|
+
os.unlink(output_file)
|
3531
|
+
|
3524
3532
|
try:
|
3525
3533
|
# Navigate to the URL
|
3526
3534
|
driver.get(url)
|
@@ -3734,18 +3742,19 @@ class OCR:
|
|
3734
3742
|
Import('cv2',install_name='opencv-python')
|
3735
3743
|
else:
|
3736
3744
|
Import('easyocr')
|
3745
|
+
Import('logging')
|
3737
3746
|
if self.enhance:
|
3738
3747
|
Import('PIL',install_name='Pillow')
|
3739
|
-
|
3740
|
-
|
3741
|
-
# Suppress Torch pin_memory warning
|
3748
|
+
logging.getLogger('easyocr').setLevel(logging.ERROR)
|
3749
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
|
3742
3750
|
warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
|
3743
|
-
|
3744
3751
|
# Suppress EasyOCR CPU warning
|
3745
3752
|
#warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
|
3746
3753
|
|
3747
|
-
#
|
3748
|
-
|
3754
|
+
#self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
|
3755
|
+
self.reader = easyocr.Reader(self.language, gpu=self.gpu, detector='dbnet18',
|
3756
|
+
model_storage_directory=self.model_storage_directory,
|
3757
|
+
download_enabled=False)
|
3749
3758
|
|
3750
3759
|
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
|
3751
3760
|
if not image_file: image_file=self.image_file
|
@@ -3777,7 +3786,7 @@ class OCR:
|
|
3777
3786
|
kernel = np.ones((2, 2), np.uint8)
|
3778
3787
|
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
3779
3788
|
lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
|
3780
|
-
if not lang or lang
|
3789
|
+
if not lang or IsIn(lang,['en','eng','english']): lang='eng'
|
3781
3790
|
try:
|
3782
3791
|
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
|
3783
3792
|
except:
|
@@ -3789,6 +3798,9 @@ class OCR:
|
|
3789
3798
|
else:
|
3790
3799
|
opts={}
|
3791
3800
|
opts['detail']=detail
|
3801
|
+
opts['batch_size']=2
|
3802
|
+
opts['contrast_ths']=0.3
|
3803
|
+
opts['allowlist']='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:/-_@,"+'
|
3792
3804
|
if isinstance(low_text,float): opts['low_text']=low_test
|
3793
3805
|
if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
|
3794
3806
|
if self.enhance:
|
@@ -3797,11 +3809,7 @@ class OCR:
|
|
3797
3809
|
image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
|
3798
3810
|
image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
|
3799
3811
|
image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
|
3800
|
-
# image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
|
3801
3812
|
image.save(image_file)
|
3802
|
-
# image_np = numpy.array(image)
|
3803
|
-
# return self.reader.readtext(image_np,**opts)
|
3804
|
-
# else:
|
3805
3813
|
text=self.reader.readtext(image_file,**opts)
|
3806
3814
|
if output is str:
|
3807
3815
|
return ' '.join(text)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|