kmisc 2.1.122__py3-none-any.whl → 2.1.124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kmisc/__init__.py +190 -68
- {kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/METADATA +1 -1
- kmisc-2.1.124.dist-info/RECORD +6 -0
- kmisc-2.1.122.dist-info/RECORD +0 -6
- {kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/LICENSE +0 -0
- {kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/WHEEL +0 -0
- {kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/top_level.txt +0 -0
kmisc/__init__.py
CHANGED
@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
|
|
3453
3453
|
if default in ['org',{'org'}]: return src
|
3454
3454
|
return default
|
3455
3455
|
|
3456
|
-
def web_capture(url,output_file,image_size='
|
3456
|
+
def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
|
3457
3457
|
#auth_fields.submit.type : name : login button with name
|
3458
3458
|
# : id : login button with id
|
3459
3459
|
# : submit : submit button without name or id
|
@@ -3463,6 +3463,8 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3463
3463
|
#auth_fields.auth.name : (usernane,password) : username/password field string for name or id
|
3464
3464
|
#next_do : put data and click submit
|
3465
3465
|
|
3466
|
+
_url=url.split('/')
|
3467
|
+
url=WEB().url_join(*_url[1:],method=_url[0])
|
3466
3468
|
if isinstance(image_size,str):
|
3467
3469
|
if 'x' in image_size:
|
3468
3470
|
image_size=image_size.split('x')
|
@@ -3475,8 +3477,9 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3475
3477
|
if isinstance(image_size,(list,tuple)) and len(image_size) == 2:
|
3476
3478
|
image_size=','.join([str(i) for i in image_size])
|
3477
3479
|
else:
|
3478
|
-
|
3479
|
-
|
3480
|
+
if not IsIn(image_size,[None,'full','fullscreen','full_screen','auto']):
|
3481
|
+
#Set it to default image size
|
3482
|
+
image_size='1920,1080'
|
3480
3483
|
|
3481
3484
|
if Import('import selenium'):
|
3482
3485
|
return False,'Can not install selenium package'
|
@@ -3484,10 +3487,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3484
3487
|
if backup:
|
3485
3488
|
Import('filecmp')
|
3486
3489
|
Import('shutil')
|
3490
|
+
if capture_type in ['mov','mp4']:
|
3491
|
+
print('????load cv2')
|
3492
|
+
Import('cv2',install_name='opencv-python')
|
3487
3493
|
|
3488
3494
|
ocr=None
|
3489
3495
|
if capture_method != 'file':
|
3490
|
-
ocr=OCR(enhance=ocr_enhance)
|
3496
|
+
ocr=OCR(enhance=ocr_enhance,module=ocr_module)
|
3491
3497
|
# Configure Chrome options for headless mode
|
3492
3498
|
from selenium.webdriver.chrome.options import Options
|
3493
3499
|
from selenium.webdriver.common.by import By
|
@@ -3497,7 +3503,9 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3497
3503
|
chrome_options.add_argument('--headless') # Run in headless mode
|
3498
3504
|
chrome_options.add_argument('--no-sandbox')
|
3499
3505
|
chrome_options.add_argument('--disable-dev-shm-usage')
|
3500
|
-
|
3506
|
+
|
3507
|
+
if image_size.lower() not in ['full','fullscreen','full_screen','auto']:
|
3508
|
+
chrome_options.add_argument(f"--window-size={image_size}") # Set window size
|
3501
3509
|
if not gpu:
|
3502
3510
|
chrome_options.add_argument("--disable-gpu")
|
3503
3511
|
if ignore_certificate_error:
|
@@ -3505,6 +3513,14 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3505
3513
|
chrome_options.add_argument('--allow-insecure-localhost') # gnore-certificate-errors
|
3506
3514
|
# Initialize the Chrome driver
|
3507
3515
|
driver = selenium.webdriver.Chrome(options=chrome_options)
|
3516
|
+
if image_size.lower() in ['full','fullscreen','full_screen','auto']:
|
3517
|
+
#original_size = driver.get_window_size()
|
3518
|
+
#full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
|
3519
|
+
#full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
|
3520
|
+
#driver.set_window_size(full_width, full_height)
|
3521
|
+
# code save screenshot
|
3522
|
+
#driver.set_window_size(original_size['width'], original_size['height']) #restore size
|
3523
|
+
driver.maximize_window()
|
3508
3524
|
rc=False,output_file
|
3509
3525
|
try:
|
3510
3526
|
# Navigate to the URL
|
@@ -3559,7 +3575,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3559
3575
|
next_do_button = driver.find_element(By.XPATH, "//button[@type='submit']")
|
3560
3576
|
next_do_button.click()
|
3561
3577
|
|
3562
|
-
def _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon):
|
3578
|
+
def _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file):
|
3563
3579
|
def wait_body(driver,timeout=10):
|
3564
3580
|
#wait until get screen data
|
3565
3581
|
try:
|
@@ -3578,23 +3594,37 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3578
3594
|
time.sleep(timeout)
|
3579
3595
|
|
3580
3596
|
live_capture=Int(live_capture)
|
3581
|
-
|
3597
|
+
if not isinstance(wait_time,(int,float)):
|
3598
|
+
wait_time=Int(wait_time,10)
|
3582
3599
|
backup_idx=0
|
3583
3600
|
if backup:
|
3584
3601
|
backup=Int(backup,2)
|
3585
3602
|
if isinstance(live_capture,int) and live_capture > wait_time*2:
|
3603
|
+
#Keep capture
|
3586
3604
|
Time=TIME()
|
3605
|
+
if capture_type in ['mov','mp4']:
|
3606
|
+
frame_rate=1/wait_time
|
3607
|
+
window_size = driver.get_window_size()
|
3608
|
+
width = window_size['width']
|
3609
|
+
height = window_size['height']
|
3610
|
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for MP4
|
3611
|
+
video_writer = cv2.VideoWriter(video_file, fourcc, frame_rate, (width, height))
|
3587
3612
|
while True:
|
3588
3613
|
if Time.Out(live_capture):
|
3589
3614
|
driver.quit()
|
3590
|
-
|
3615
|
+
if capture_type in ['mov','mp4']:
|
3616
|
+
video_writer.release()
|
3617
|
+
return True
|
3618
|
+
else:
|
3619
|
+
#Do something, but not return until timeout. So return False
|
3620
|
+
return False
|
3591
3621
|
# Capture screenshot
|
3592
3622
|
if log:
|
3593
|
-
if log
|
3623
|
+
if IsIn(log,['screen','log','print',print]):
|
3594
3624
|
printf(Dot(),direct=True)
|
3595
3625
|
else:
|
3596
3626
|
printf(Dot(),log=log,direct=True)
|
3597
|
-
if backup:
|
3627
|
+
if backup and capture_type not in ['mov','mp4']:
|
3598
3628
|
save_file='{}.{}'.format(output_file,backup_idx%backup)
|
3599
3629
|
else:
|
3600
3630
|
save_file=output_file
|
@@ -3602,43 +3632,85 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3602
3632
|
wait_body(driver,timeout=wait_time)
|
3603
3633
|
#capture
|
3604
3634
|
driver.save_screenshot(save_file)
|
3605
|
-
if
|
3606
|
-
|
3607
|
-
|
3608
|
-
|
3609
|
-
|
3610
|
-
|
3611
|
-
|
3612
|
-
|
3613
|
-
|
3614
|
-
|
3615
|
-
|
3616
|
-
|
3617
|
-
|
3618
|
-
|
3619
|
-
|
3620
|
-
|
3621
|
-
|
3622
|
-
|
3623
|
-
|
3624
|
-
|
3625
|
-
|
3626
|
-
|
3627
|
-
|
3635
|
+
if capture_type in ['mov','mp4']:
|
3636
|
+
#Make a video file
|
3637
|
+
frame = cv2.imread(save_file)
|
3638
|
+
frame = cv2.resize(frame, (width, height))
|
3639
|
+
video_writer.write(frame)
|
3640
|
+
else:
|
3641
|
+
#do something with picture file
|
3642
|
+
if backup:
|
3643
|
+
if backup == 2:
|
3644
|
+
comp_a=f'{output_file}.0'
|
3645
|
+
comp_b=f'{output_file}.1'
|
3646
|
+
if os.path.isfile(comp_a) and os.path.isfile(comp_b):
|
3647
|
+
if filecmp.cmp(comp_a,comp_b):
|
3648
|
+
if log:
|
3649
|
+
if IsIn(log,['screen','log','print',print]):
|
3650
|
+
printf(Dot(),direct=True)
|
3651
|
+
else:
|
3652
|
+
printf(Dot(),log=log,direct=True)
|
3653
|
+
time.sleep(wait_time)
|
3654
|
+
backup_idx+=1
|
3655
|
+
continue
|
3656
|
+
shutil.copy2(save_file,output_file)
|
3657
|
+
if IsIn(capture_method,['log','screen','text']):
|
3658
|
+
found_words=ocr.Text(image_file=save_file)
|
3659
|
+
found_strings=found_space.join(found_words)
|
3660
|
+
if IsIn(log,['screen','log','print',print,None]):
|
3661
|
+
printf(found_strings,mode='s')
|
3662
|
+
else:
|
3663
|
+
printf(found_strings,log=log,mode='d')
|
3664
|
+
if find_string:
|
3665
|
+
if not isinstance(find_string,list):
|
3666
|
+
find_string=[find_string]
|
3667
|
+
for ff in find_string:
|
3668
|
+
if ff in found_strings:
|
3669
|
+
#Find exit string, So True, So True, So True, So True
|
3670
|
+
driver.quit()
|
3671
|
+
return True
|
3672
|
+
backup_idx+=1
|
3673
|
+
#capture interval
|
3628
3674
|
time.sleep(wait_time)
|
3629
|
-
backup_idx+=1
|
3630
3675
|
else:
|
3676
|
+
#Single capture
|
3631
3677
|
#wait
|
3632
|
-
wait_body(driver,timeout=wait_time)
|
3678
|
+
#wait_body(driver,timeout=wait_time)
|
3679
|
+
time.sleep(wait_time)
|
3633
3680
|
#capture
|
3634
3681
|
driver.save_screenshot(output_file)
|
3635
|
-
|
3682
|
+
if IsIn(capture_method,['log','screen','text']):
|
3683
|
+
found_words=ocr.Text(image_file=output_file)
|
3684
|
+
found_strings=found_space.join(found_words)
|
3685
|
+
if IsIn(log,['screen','log','print',print,None]):
|
3686
|
+
printf(found_strings,mode='s')
|
3687
|
+
else:
|
3688
|
+
printf(found_strings,log=log,mode='d')
|
3689
|
+
if find_string:
|
3690
|
+
if find_string in found_strings:
|
3691
|
+
driver.quit()
|
3692
|
+
return True
|
3693
|
+
driver.quit()
|
3694
|
+
if IsIn(capture_method,['text']):
|
3695
|
+
return found_strings
|
3696
|
+
return True
|
3697
|
+
|
3698
|
+
##
|
3699
|
+
if IsIn(capture_type,['mov','mp4']):
|
3700
|
+
if not video_file:
|
3701
|
+
video_file='{}.mp4'.format('.'.join(output_file.split('.')[:-1]))
|
3702
|
+
|
3703
|
+
#Background running
|
3636
3704
|
if daemon:
|
3637
|
-
t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon))
|
3705
|
+
t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file))
|
3638
3706
|
return t
|
3639
3707
|
else:
|
3640
|
-
|
3641
|
-
|
3708
|
+
#Single process running
|
3709
|
+
rc=_capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file)
|
3710
|
+
if IsIn(capture_type,['mov','mp4']):
|
3711
|
+
return rc,video_file
|
3712
|
+
else:
|
3713
|
+
return rc,output_file
|
3642
3714
|
|
3643
3715
|
except Exception as e:
|
3644
3716
|
#print(f"Error capturing screenshot: {str(e)}")
|
@@ -3650,42 +3722,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
|
|
3650
3722
|
# return rc
|
3651
3723
|
|
3652
3724
|
class OCR:
|
3653
|
-
def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
|
3725
|
+
def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
|
3654
3726
|
self.enhance=enhance
|
3655
3727
|
self.image_file=image_file
|
3656
|
-
|
3657
|
-
|
3658
|
-
|
3659
|
-
|
3660
|
-
self.
|
3661
|
-
|
3662
|
-
|
3728
|
+
self.ocr_module=ocr_module
|
3729
|
+
self.language=language
|
3730
|
+
self.gpu=gpu
|
3731
|
+
self.model_storage_directory=model_storage_directory
|
3732
|
+
if self.ocr_module == 'pytesseract':
|
3733
|
+
Import('pytesseract')
|
3734
|
+
Import('import numpy as np')
|
3735
|
+
Import('cv2',install_name='opencv-python')
|
3736
|
+
else:
|
3737
|
+
Import('easyocr')
|
3738
|
+
if self.enhance:
|
3739
|
+
Import('PIL',install_name='Pillow')
|
3740
|
+
Import('numpy')
|
3741
|
+
self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
|
3742
|
+
# Suppress Torch pin_memory warning
|
3743
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
|
3663
3744
|
|
3664
|
-
|
3665
|
-
|
3745
|
+
# Suppress EasyOCR CPU warning
|
3746
|
+
#warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
|
3666
3747
|
|
3667
|
-
|
3668
|
-
|
3748
|
+
# Suppress NetworkX backend warning
|
3749
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
|
3669
3750
|
|
3670
|
-
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
|
3751
|
+
def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
|
3671
3752
|
if not image_file: image_file=self.image_file
|
3672
3753
|
if not image_file: return False
|
3673
|
-
|
3674
|
-
|
3675
|
-
|
3676
|
-
|
3677
|
-
|
3678
|
-
|
3679
|
-
image
|
3680
|
-
|
3681
|
-
|
3682
|
-
|
3683
|
-
|
3684
|
-
|
3685
|
-
#
|
3686
|
-
|
3687
|
-
|
3688
|
-
|
3754
|
+
if not os.path.isfile(image_file): return False
|
3755
|
+
if self.ocr_module == 'pytesseract':
|
3756
|
+
image = cv2.imread(image_file)
|
3757
|
+
# Convert to grayscale
|
3758
|
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
3759
|
+
|
3760
|
+
# Invert the image to make text black on white (Tesseract prefers this)
|
3761
|
+
inverted = cv2.bitwise_not(gray)
|
3762
|
+
|
3763
|
+
# Light noise reduction with small Gaussian blur (fast)
|
3764
|
+
blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
|
3765
|
+
|
3766
|
+
# Optional CLAHE for contrast (comment out if too slow; it's generally fast)
|
3767
|
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
3768
|
+
enhanced = clahe.apply(blurred)
|
3769
|
+
|
3770
|
+
# Small upscale (1.5x) for better DPI without heavy computation
|
3771
|
+
scale_factor = 1.5
|
3772
|
+
resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) # LINEAR is faster than CUBIC
|
3773
|
+
|
3774
|
+
# Adaptive thresholding for varying console text quality
|
3775
|
+
thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
3776
|
+
|
3777
|
+
# Minimal morphological cleanup (small kernel for speed)
|
3778
|
+
kernel = np.ones((2, 2), np.uint8)
|
3779
|
+
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
3780
|
+
lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
|
3781
|
+
if not lang or lang == 'en': lang='eng'
|
3782
|
+
try:
|
3783
|
+
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
|
3784
|
+
except:
|
3785
|
+
text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
|
3786
|
+
if output is str:
|
3787
|
+
return text
|
3788
|
+
else:
|
3789
|
+
return text.split()
|
3790
|
+
else:
|
3791
|
+
opts={}
|
3792
|
+
opts['detail']=detail
|
3793
|
+
if isinstance(low_text,float): opts['low_text']=low_test
|
3794
|
+
if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
|
3795
|
+
if self.enhance:
|
3796
|
+
image = PIL.Image.open(image_file)
|
3797
|
+
image = image.convert('L') #Grayscale
|
3798
|
+
image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
|
3799
|
+
image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
|
3800
|
+
image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
|
3801
|
+
# image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
|
3802
|
+
image.save(image_file)
|
3803
|
+
# image_np = numpy.array(image)
|
3804
|
+
# return self.reader.readtext(image_np,**opts)
|
3805
|
+
# else:
|
3806
|
+
text=self.reader.readtext(image_file,**opts)
|
3807
|
+
if output is str:
|
3808
|
+
return ' '.join(text)
|
3809
|
+
else:
|
3810
|
+
return text
|
3689
3811
|
|
3690
3812
|
############################################
|
3691
3813
|
#Temporary function map for replacement
|
@@ -0,0 +1,6 @@
|
|
1
|
+
kmisc/__init__.py,sha256=9KNDkVxjqk5_f1h8TRJMwItMXSZFyXBAzwK4MKR-6aM,163758
|
2
|
+
kmisc-2.1.124.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
|
3
|
+
kmisc-2.1.124.dist-info/METADATA,sha256=0Jeai7LsZye2IPSLxdVv8EfoU37IpRSJYJb5D4J1io8,5523
|
4
|
+
kmisc-2.1.124.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
5
|
+
kmisc-2.1.124.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
|
6
|
+
kmisc-2.1.124.dist-info/RECORD,,
|
kmisc-2.1.122.dist-info/RECORD
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
kmisc/__init__.py,sha256=nX41gPkw15kBnuav_Xmh11ZBdVZKRJq7OT13qs2Vjn0,156878
|
2
|
-
kmisc-2.1.122.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
|
3
|
-
kmisc-2.1.122.dist-info/METADATA,sha256=-e9Y6YbWJmJ8DT1bcCvIHlwClRtEgMUVHboS5EjxYXw,5523
|
4
|
-
kmisc-2.1.122.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
5
|
-
kmisc-2.1.122.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
|
6
|
-
kmisc-2.1.122.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|