kmisc 2.1.125__py3-none-any.whl → 2.1.126__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kmisc/__init__.py +25 -19
- {kmisc-2.1.125.dist-info → kmisc-2.1.126.dist-info}/METADATA +1 -1
- kmisc-2.1.126.dist-info/RECORD +6 -0
- kmisc-2.1.125.dist-info/RECORD +0 -6
- {kmisc-2.1.125.dist-info → kmisc-2.1.126.dist-info}/LICENSE +0 -0
- {kmisc-2.1.125.dist-info → kmisc-2.1.126.dist-info}/WHEEL +0 -0
- {kmisc-2.1.125.dist-info → kmisc-2.1.126.dist-info}/top_level.txt +0 -0
kmisc/__init__.py
CHANGED
@@ -3462,12 +3462,15 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3462
3462
|
# : id : username/password with name id
|
3463
3463
|
#auth_fields.auth.name : (usernane,password) : username/password field string for name or id
|
3464
3464
|
#next_do : put data and click submit
|
3465
|
-
|
3466
3465
|
_url=url.split('/')
|
3467
3466
|
url=WEB().url_join(*_url[1:],method=_url[0])
|
3468
3467
|
if isinstance(image_size,str):
|
3469
3468
|
if 'x' in image_size:
|
3470
3469
|
image_size=image_size.split('x')
|
3470
|
+
elif 'X' in image_size:
|
3471
|
+
image_size=image_size.split('X')
|
3472
|
+
elif '*' in image_size:
|
3473
|
+
image_size=image_size.split('*')
|
3471
3474
|
elif ',' in image_size:
|
3472
3475
|
image_size=image_size.split(',')
|
3473
3476
|
elif '*' in image_size:
|
@@ -3477,9 +3480,10 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3477
3480
|
if isinstance(image_size,(list,tuple)) and len(image_size) == 2:
|
3478
3481
|
image_size=','.join([str(i) for i in image_size])
|
3479
3482
|
else:
|
3480
|
-
if not IsIn(image_size,[
|
3481
|
-
|
3482
|
-
|
3483
|
+
if not IsIn(image_size,['full','fullscreen','full_screen','full_size']):
|
3484
|
+
if IsIn(image_size,[None,'auto']):
|
3485
|
+
#Set it to default image size
|
3486
|
+
image_size='1920,1080'
|
3483
3487
|
|
3484
3488
|
if Import('import selenium'):
|
3485
3489
|
return False,'Can not install selenium package'
|
@@ -3502,8 +3506,9 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3502
3506
|
chrome_options.add_argument('--headless') # Run in headless mode
|
3503
3507
|
chrome_options.add_argument('--no-sandbox')
|
3504
3508
|
chrome_options.add_argument('--disable-dev-shm-usage')
|
3509
|
+
chrome_options.add_argument('--disable-extensions')
|
3505
3510
|
|
3506
|
-
if image_size.lower() not in ['full','fullscreen','full_screen','
|
3511
|
+
if image_size.lower() not in ['full','fullscreen','full_screen','full_size']:
|
3507
3512
|
chrome_options.add_argument(f"--window-size={image_size}") # Set window size
|
3508
3513
|
if not gpu:
|
3509
3514
|
chrome_options.add_argument("--disable-gpu")
|
@@ -3512,15 +3517,18 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
|
|
3512
3517
|
chrome_options.add_argument('--allow-insecure-localhost') # gnore-certificate-errors
|
3513
3518
|
# Initialize the Chrome driver
|
3514
3519
|
driver = selenium.webdriver.Chrome(options=chrome_options)
|
3515
|
-
if image_size.lower() in ['full','fullscreen','full_screen','
|
3520
|
+
if image_size.lower() in ['full','fullscreen','full_screen','full_size']:
|
3516
3521
|
#original_size = driver.get_window_size()
|
3517
3522
|
#full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
|
3518
3523
|
#full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
|
3519
3524
|
#driver.set_window_size(full_width, full_height)
|
3520
|
-
# code save screenshot
|
3521
|
-
#driver.set_window_size(original_size['width'], original_size['height']) #restore size
|
3522
3525
|
driver.maximize_window()
|
3523
3526
|
rc=False,output_file
|
3527
|
+
#Cleanup first
|
3528
|
+
if isinstance(output_file,str) and output_file:
|
3529
|
+
if os.path.isfile(output_file):
|
3530
|
+
os.unlink(output_file)
|
3531
|
+
|
3524
3532
|
try:
|
3525
3533
|
# Navigate to the URL
|
3526
3534
|
driver.get(url)
|
@@ -3736,16 +3744,15 @@ class OCR:
|
|
3736
3744
|
Import('easyocr')
|
3737
3745
|
if self.enhance:
|
3738
3746
|
Import('PIL',install_name='Pillow')
|
3739
|
-
|
3740
|
-
self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
|
3741
|
-
# Suppress Torch pin_memory warning
|
3747
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
|
3742
3748
|
warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
|
3743
|
-
|
3744
3749
|
# Suppress EasyOCR CPU warning
|
3745
3750
|
#warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
|
3746
3751
|
|
3747
|
-
#
|
3748
|
-
|
3752
|
+
#self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
|
3753
|
+
self.reader = easyocr.Reader(self.language, gpu=self.gpu, detector='dbnet18',
|
3754
|
+
model_storage_directory=self.model_storage_directory,
|
3755
|
+
download_enabled=False)
|
3749
3756
|
|
3750
3757
|
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
|
3751
3758
|
if not image_file: image_file=self.image_file
|
@@ -3777,7 +3784,7 @@ class OCR:
|
|
3777
3784
|
kernel = np.ones((2, 2), np.uint8)
|
3778
3785
|
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
3779
3786
|
lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
|
3780
|
-
if not lang or lang
|
3787
|
+
if not lang or IsIn(lang,['en','eng','english']): lang='eng'
|
3781
3788
|
try:
|
3782
3789
|
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
|
3783
3790
|
except:
|
@@ -3789,6 +3796,9 @@ class OCR:
|
|
3789
3796
|
else:
|
3790
3797
|
opts={}
|
3791
3798
|
opts['detail']=detail
|
3799
|
+
opts['batch_size']=2
|
3800
|
+
opts['contrast_ths']=0.3
|
3801
|
+
opts['allowlist']='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:/-_@,"+'
|
3792
3802
|
if isinstance(low_text,float): opts['low_text']=low_test
|
3793
3803
|
if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
|
3794
3804
|
if self.enhance:
|
@@ -3797,11 +3807,7 @@ class OCR:
|
|
3797
3807
|
image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
|
3798
3808
|
image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
|
3799
3809
|
image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
|
3800
|
-
# image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
|
3801
3810
|
image.save(image_file)
|
3802
|
-
# image_np = numpy.array(image)
|
3803
|
-
# return self.reader.readtext(image_np,**opts)
|
3804
|
-
# else:
|
3805
3811
|
text=self.reader.readtext(image_file,**opts)
|
3806
3812
|
if output is str:
|
3807
3813
|
return ' '.join(text)
|
@@ -0,0 +1,6 @@
|
|
1
|
+
kmisc/__init__.py,sha256=9Tr1tEZPHJ7CQXPJz6bjoEwW6IEOGIl1VBrBzgTjASo,164077
|
2
|
+
kmisc-2.1.126.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
|
3
|
+
kmisc-2.1.126.dist-info/METADATA,sha256=K7bGVkNJcPRqIiZeOxUUUskLANnK3p1J2agHVDBpGk8,5523
|
4
|
+
kmisc-2.1.126.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
5
|
+
kmisc-2.1.126.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
|
6
|
+
kmisc-2.1.126.dist-info/RECORD,,
|
kmisc-2.1.125.dist-info/RECORD
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
kmisc/__init__.py,sha256=clNYHP2yuY6LqEG9lobGvdMyIA3EocDSsTJUfZTIs9U,163729
|
2
|
-
kmisc-2.1.125.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
|
3
|
-
kmisc-2.1.125.dist-info/METADATA,sha256=7na-3ZkC58XtrxZtn77ONgxR39jKufWqZLdEjo0dZxs,5523
|
4
|
-
kmisc-2.1.125.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
5
|
-
kmisc-2.1.125.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
|
6
|
-
kmisc-2.1.125.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|