kmisc 2.1.122__py3-none-any.whl → 2.1.124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kmisc/__init__.py CHANGED
@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
3453
3453
  if default in ['org',{'org'}]: return src
3454
3454
  return default
3455
3455
 
3456
- def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
3456
+ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
3457
3457
  #auth_fields.submit.type : name : login button with name
3458
3458
  # : id : login button with id
3459
3459
  # : submit : submit button without name or id
@@ -3463,6 +3463,8 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3463
3463
  #auth_fields.auth.name : (usernane,password) : username/password field string for name or id
3464
3464
  #next_do : put data and click submit
3465
3465
 
3466
+ _url=url.split('/')
3467
+ url=WEB().url_join(*_url[1:],method=_url[0])
3466
3468
  if isinstance(image_size,str):
3467
3469
  if 'x' in image_size:
3468
3470
  image_size=image_size.split('x')
@@ -3475,8 +3477,9 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3475
3477
  if isinstance(image_size,(list,tuple)) and len(image_size) == 2:
3476
3478
  image_size=','.join([str(i) for i in image_size])
3477
3479
  else:
3478
- #Set it to default image size
3479
- image_size='1920,1080'
3480
+ if not IsIn(image_size,[None,'full','fullscreen','full_screen','auto']):
3481
+ #Set it to default image size
3482
+ image_size='1920,1080'
3480
3483
 
3481
3484
  if Import('import selenium'):
3482
3485
  return False,'Can not install selenium package'
@@ -3484,10 +3487,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3484
3487
  if backup:
3485
3488
  Import('filecmp')
3486
3489
  Import('shutil')
3490
+ if capture_type in ['mov','mp4']:
3491
+ print('????load cv2')
3492
+ Import('cv2',install_name='opencv-python')
3487
3493
 
3488
3494
  ocr=None
3489
3495
  if capture_method != 'file':
3490
- ocr=OCR(enhance=ocr_enhance)
3496
+ ocr=OCR(enhance=ocr_enhance,module=ocr_module)
3491
3497
  # Configure Chrome options for headless mode
3492
3498
  from selenium.webdriver.chrome.options import Options
3493
3499
  from selenium.webdriver.common.by import By
@@ -3497,7 +3503,9 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3497
3503
  chrome_options.add_argument('--headless') # Run in headless mode
3498
3504
  chrome_options.add_argument('--no-sandbox')
3499
3505
  chrome_options.add_argument('--disable-dev-shm-usage')
3500
- chrome_options.add_argument(f"--window-size={image_size}") # Set window size
3506
+
3507
+ if image_size.lower() not in ['full','fullscreen','full_screen','auto']:
3508
+ chrome_options.add_argument(f"--window-size={image_size}") # Set window size
3501
3509
  if not gpu:
3502
3510
  chrome_options.add_argument("--disable-gpu")
3503
3511
  if ignore_certificate_error:
@@ -3505,6 +3513,14 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3505
3513
  chrome_options.add_argument('--allow-insecure-localhost') # gnore-certificate-errors
3506
3514
  # Initialize the Chrome driver
3507
3515
  driver = selenium.webdriver.Chrome(options=chrome_options)
3516
+ if image_size.lower() in ['full','fullscreen','full_screen','auto']:
3517
+ #original_size = driver.get_window_size()
3518
+ #full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
3519
+ #full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
3520
+ #driver.set_window_size(full_width, full_height)
3521
+ # code save screenshot
3522
+ #driver.set_window_size(original_size['width'], original_size['height']) #restore size
3523
+ driver.maximize_window()
3508
3524
  rc=False,output_file
3509
3525
  try:
3510
3526
  # Navigate to the URL
@@ -3559,7 +3575,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3559
3575
  next_do_button = driver.find_element(By.XPATH, "//button[@type='submit']")
3560
3576
  next_do_button.click()
3561
3577
 
3562
- def _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon):
3578
+ def _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file):
3563
3579
  def wait_body(driver,timeout=10):
3564
3580
  #wait until get screen data
3565
3581
  try:
@@ -3578,23 +3594,37 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3578
3594
  time.sleep(timeout)
3579
3595
 
3580
3596
  live_capture=Int(live_capture)
3581
- wait_time=Int(wait_time,10)
3597
+ if not isinstance(wait_time,(int,float)):
3598
+ wait_time=Int(wait_time,10)
3582
3599
  backup_idx=0
3583
3600
  if backup:
3584
3601
  backup=Int(backup,2)
3585
3602
  if isinstance(live_capture,int) and live_capture > wait_time*2:
3603
+ #Keep capture
3586
3604
  Time=TIME()
3605
+ if capture_type in ['mov','mp4']:
3606
+ frame_rate=1/wait_time
3607
+ window_size = driver.get_window_size()
3608
+ width = window_size['width']
3609
+ height = window_size['height']
3610
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for MP4
3611
+ video_writer = cv2.VideoWriter(video_file, fourcc, frame_rate, (width, height))
3587
3612
  while True:
3588
3613
  if Time.Out(live_capture):
3589
3614
  driver.quit()
3590
- return False
3615
+ if capture_type in ['mov','mp4']:
3616
+ video_writer.release()
3617
+ return True
3618
+ else:
3619
+ #Do something, but not return until timeout. So return False
3620
+ return False
3591
3621
  # Capture screenshot
3592
3622
  if log:
3593
- if log in ['screen','log','print',print]:
3623
+ if IsIn(log,['screen','log','print',print]):
3594
3624
  printf(Dot(),direct=True)
3595
3625
  else:
3596
3626
  printf(Dot(),log=log,direct=True)
3597
- if backup:
3627
+ if backup and capture_type not in ['mov','mp4']:
3598
3628
  save_file='{}.{}'.format(output_file,backup_idx%backup)
3599
3629
  else:
3600
3630
  save_file=output_file
@@ -3602,43 +3632,85 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3602
3632
  wait_body(driver,timeout=wait_time)
3603
3633
  #capture
3604
3634
  driver.save_screenshot(save_file)
3605
- if backup:
3606
- if backup == 2:
3607
- comp_a=f'{output_file}.0'
3608
- comp_b=f'{output_file}.1'
3609
- if os.path.isfile(comp_a) and os.path.isfile(comp_b):
3610
- if filecmp.cmp(comp_a,comp_b):
3611
- if log:
3612
- if log in ['screen','log','print',print]:
3613
- printf(Dot(),direct=True)
3614
- else:
3615
- printf(Dot(),log=log,direct=True)
3616
- time.sleep(wait_time)
3617
- backup_idx+=1
3618
- continue
3619
- shutil.copy2(save_file,output_file)
3620
- if IsIn(capture_method,['log','screen','text']):
3621
- found_words=ocr.Text(image_file=save_file)
3622
- found_strings=found_space.join(found_words)
3623
- printf(found_strings,log=log,mode='d' if log else 's')
3624
- if find_string:
3625
- if find_string in found_strings:
3626
- driver.quit()
3627
- return True
3635
+ if capture_type in ['mov','mp4']:
3636
+ #Make a video file
3637
+ frame = cv2.imread(save_file)
3638
+ frame = cv2.resize(frame, (width, height))
3639
+ video_writer.write(frame)
3640
+ else:
3641
+ #do something with picture file
3642
+ if backup:
3643
+ if backup == 2:
3644
+ comp_a=f'{output_file}.0'
3645
+ comp_b=f'{output_file}.1'
3646
+ if os.path.isfile(comp_a) and os.path.isfile(comp_b):
3647
+ if filecmp.cmp(comp_a,comp_b):
3648
+ if log:
3649
+ if IsIn(log,['screen','log','print',print]):
3650
+ printf(Dot(),direct=True)
3651
+ else:
3652
+ printf(Dot(),log=log,direct=True)
3653
+ time.sleep(wait_time)
3654
+ backup_idx+=1
3655
+ continue
3656
+ shutil.copy2(save_file,output_file)
3657
+ if IsIn(capture_method,['log','screen','text']):
3658
+ found_words=ocr.Text(image_file=save_file)
3659
+ found_strings=found_space.join(found_words)
3660
+ if IsIn(log,['screen','log','print',print,None]):
3661
+ printf(found_strings,mode='s')
3662
+ else:
3663
+ printf(found_strings,log=log,mode='d')
3664
+ if find_string:
3665
+ if not isinstance(find_string,list):
3666
+ find_string=[find_string]
3667
+ for ff in find_string:
3668
+ if ff in found_strings:
3669
+ #Find exit string, So True, So True, So True, So True
3670
+ driver.quit()
3671
+ return True
3672
+ backup_idx+=1
3673
+ #capture interval
3628
3674
  time.sleep(wait_time)
3629
- backup_idx+=1
3630
3675
  else:
3676
+ #Single capture
3631
3677
  #wait
3632
- wait_body(driver,timeout=wait_time)
3678
+ #wait_body(driver,timeout=wait_time)
3679
+ time.sleep(wait_time)
3633
3680
  #capture
3634
3681
  driver.save_screenshot(output_file)
3635
- driver.quit()
3682
+ if IsIn(capture_method,['log','screen','text']):
3683
+ found_words=ocr.Text(image_file=output_file)
3684
+ found_strings=found_space.join(found_words)
3685
+ if IsIn(log,['screen','log','print',print,None]):
3686
+ printf(found_strings,mode='s')
3687
+ else:
3688
+ printf(found_strings,log=log,mode='d')
3689
+ if find_string:
3690
+ if find_string in found_strings:
3691
+ driver.quit()
3692
+ return True
3693
+ driver.quit()
3694
+ if IsIn(capture_method,['text']):
3695
+ return found_strings
3696
+ return True
3697
+
3698
+ ##
3699
+ if IsIn(capture_type,['mov','mp4']):
3700
+ if not video_file:
3701
+ video_file='{}.mp4'.format('.'.join(output_file.split('.')[:-1]))
3702
+
3703
+ #Background running
3636
3704
  if daemon:
3637
- t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon))
3705
+ t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file))
3638
3706
  return t
3639
3707
  else:
3640
- _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon)
3641
- return True,output_file
3708
+ #Single process running
3709
+ rc=_capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file)
3710
+ if IsIn(capture_type,['mov','mp4']):
3711
+ return rc,video_file
3712
+ else:
3713
+ return rc,output_file
3642
3714
 
3643
3715
  except Exception as e:
3644
3716
  #print(f"Error capturing screenshot: {str(e)}")
@@ -3650,42 +3722,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
3650
3722
  # return rc
3651
3723
 
3652
3724
  class OCR:
3653
- def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
3725
+ def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
3654
3726
  self.enhance=enhance
3655
3727
  self.image_file=image_file
3656
- Import('easyocr')
3657
- if self.enhance:
3658
- Import('PIL',install_name='Pillow')
3659
- Import('numpy')
3660
- self.reader = easyocr.Reader(language,gpu=gpu,model_storage_directory=model_storage_directory)
3661
- # Suppress Torch pin_memory warning
3662
- warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3728
+ self.ocr_module=ocr_module
3729
+ self.language=language
3730
+ self.gpu=gpu
3731
+ self.model_storage_directory=model_storage_directory
3732
+ if self.ocr_module == 'pytesseract':
3733
+ Import('pytesseract')
3734
+ Import('import numpy as np')
3735
+ Import('cv2',install_name='opencv-python')
3736
+ else:
3737
+ Import('easyocr')
3738
+ if self.enhance:
3739
+ Import('PIL',install_name='Pillow')
3740
+ Import('numpy')
3741
+ self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
3742
+ # Suppress Torch pin_memory warning
3743
+ warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3663
3744
 
3664
- # Suppress EasyOCR CPU warning
3665
- #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3745
+ # Suppress EasyOCR CPU warning
3746
+ #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3666
3747
 
3667
- # Suppress NetworkX backend warning
3668
- warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3748
+ # Suppress NetworkX backend warning
3749
+ warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3669
3750
 
3670
- def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
3751
+ def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
3671
3752
  if not image_file: image_file=self.image_file
3672
3753
  if not image_file: return False
3673
- opts={}
3674
- opts['detail']=detail
3675
- if isinstance(low_text,float): opts['low_text']=low_test
3676
- if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3677
- if self.enhance:
3678
- image = PIL.Image.open(image_file)
3679
- image = image.convert('L') #Grayscale
3680
- image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3681
- image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3682
- image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3683
- # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3684
- image.save(image_file)
3685
- # image_np = numpy.array(image)
3686
- # return self.reader.readtext(image_np,**opts)
3687
- # else:
3688
- return self.reader.readtext(image_file,**opts)
3754
+ if not os.path.isfile(image_file): return False
3755
+ if self.ocr_module == 'pytesseract':
3756
+ image = cv2.imread(image_file)
3757
+ # Convert to grayscale
3758
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
3759
+
3760
+ # Invert the image to make text black on white (Tesseract prefers this)
3761
+ inverted = cv2.bitwise_not(gray)
3762
+
3763
+ # Light noise reduction with small Gaussian blur (fast)
3764
+ blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
3765
+
3766
+ # Optional CLAHE for contrast (comment out if too slow; it's generally fast)
3767
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
3768
+ enhanced = clahe.apply(blurred)
3769
+
3770
+ # Small upscale (1.5x) for better DPI without heavy computation
3771
+ scale_factor = 1.5
3772
+ resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) # LINEAR is faster than CUBIC
3773
+
3774
+ # Adaptive thresholding for varying console text quality
3775
+ thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
3776
+
3777
+ # Minimal morphological cleanup (small kernel for speed)
3778
+ kernel = np.ones((2, 2), np.uint8)
3779
+ cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
3780
+ lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
3781
+ if not lang or lang == 'en': lang='eng'
3782
+ try:
3783
+ text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
3784
+ except:
3785
+ text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
3786
+ if output is str:
3787
+ return text
3788
+ else:
3789
+ return text.split()
3790
+ else:
3791
+ opts={}
3792
+ opts['detail']=detail
3793
+ if isinstance(low_text,float): opts['low_text']=low_test
3794
+ if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3795
+ if self.enhance:
3796
+ image = PIL.Image.open(image_file)
3797
+ image = image.convert('L') #Grayscale
3798
+ image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3799
+ image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3800
+ image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3801
+ # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3802
+ image.save(image_file)
3803
+ # image_np = numpy.array(image)
3804
+ # return self.reader.readtext(image_np,**opts)
3805
+ # else:
3806
+ text=self.reader.readtext(image_file,**opts)
3807
+ if output is str:
3808
+ return ' '.join(text)
3809
+ else:
3810
+ return text
3689
3811
 
3690
3812
  ############################################
3691
3813
  #Temporary function map for replacement
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kmisc
3
- Version: 2.1.122
3
+ Version: 2.1.124
4
4
  Summary: Enginering useful library
5
5
  Home-page: https://github.com/kagepark/kmisc
6
6
  Author: Kage Park
@@ -0,0 +1,6 @@
1
+ kmisc/__init__.py,sha256=9KNDkVxjqk5_f1h8TRJMwItMXSZFyXBAzwK4MKR-6aM,163758
2
+ kmisc-2.1.124.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
+ kmisc-2.1.124.dist-info/METADATA,sha256=0Jeai7LsZye2IPSLxdVv8EfoU37IpRSJYJb5D4J1io8,5523
4
+ kmisc-2.1.124.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
+ kmisc-2.1.124.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
+ kmisc-2.1.124.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- kmisc/__init__.py,sha256=nX41gPkw15kBnuav_Xmh11ZBdVZKRJq7OT13qs2Vjn0,156878
2
- kmisc-2.1.122.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
- kmisc-2.1.122.dist-info/METADATA,sha256=-e9Y6YbWJmJ8DT1bcCvIHlwClRtEgMUVHboS5EjxYXw,5523
4
- kmisc-2.1.122.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
- kmisc-2.1.122.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
- kmisc-2.1.122.dist-info/RECORD,,