kmisc 2.1.125__tar.gz → 2.1.127__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kmisc
3
- Version: 2.1.125
3
+ Version: 2.1.127
4
4
  Summary: Enginering useful library
5
5
  Home-page: https://github.com/kagepark/kmisc
6
6
  Author: Kage Park
@@ -3462,12 +3462,15 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3462
3462
  # : id : username/password with name id
3463
3463
  #auth_fields.auth.name : (usernane,password) : username/password field string for name or id
3464
3464
  #next_do : put data and click submit
3465
-
3466
3465
  _url=url.split('/')
3467
3466
  url=WEB().url_join(*_url[1:],method=_url[0])
3468
3467
  if isinstance(image_size,str):
3469
3468
  if 'x' in image_size:
3470
3469
  image_size=image_size.split('x')
3470
+ elif 'X' in image_size:
3471
+ image_size=image_size.split('X')
3472
+ elif '*' in image_size:
3473
+ image_size=image_size.split('*')
3471
3474
  elif ',' in image_size:
3472
3475
  image_size=image_size.split(',')
3473
3476
  elif '*' in image_size:
@@ -3477,9 +3480,10 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3477
3480
  if isinstance(image_size,(list,tuple)) and len(image_size) == 2:
3478
3481
  image_size=','.join([str(i) for i in image_size])
3479
3482
  else:
3480
- if not IsIn(image_size,[None,'full','fullscreen','full_screen','auto']):
3481
- #Set it to default image size
3482
- image_size='1920,1080'
3483
+ if not IsIn(image_size,['full','fullscreen','full_screen','full_size']):
3484
+ if IsIn(image_size,[None,'auto']):
3485
+ #Set it to default image size
3486
+ image_size='1920,1080'
3483
3487
 
3484
3488
  if Import('import selenium'):
3485
3489
  return False,'Can not install selenium package'
@@ -3502,8 +3506,9 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3502
3506
  chrome_options.add_argument('--headless') # Run in headless mode
3503
3507
  chrome_options.add_argument('--no-sandbox')
3504
3508
  chrome_options.add_argument('--disable-dev-shm-usage')
3509
+ chrome_options.add_argument('--disable-extensions')
3505
3510
 
3506
- if image_size.lower() not in ['full','fullscreen','full_screen','auto']:
3511
+ if image_size.lower() not in ['full','fullscreen','full_screen','full_size']:
3507
3512
  chrome_options.add_argument(f"--window-size={image_size}") # Set window size
3508
3513
  if not gpu:
3509
3514
  chrome_options.add_argument("--disable-gpu")
@@ -3512,15 +3517,18 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3512
3517
  chrome_options.add_argument('--allow-insecure-localhost') # gnore-certificate-errors
3513
3518
  # Initialize the Chrome driver
3514
3519
  driver = selenium.webdriver.Chrome(options=chrome_options)
3515
- if image_size.lower() in ['full','fullscreen','full_screen','auto']:
3520
+ if image_size.lower() in ['full','fullscreen','full_screen','full_size']:
3516
3521
  #original_size = driver.get_window_size()
3517
3522
  #full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
3518
3523
  #full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
3519
3524
  #driver.set_window_size(full_width, full_height)
3520
- # code save screenshot
3521
- #driver.set_window_size(original_size['width'], original_size['height']) #restore size
3522
3525
  driver.maximize_window()
3523
3526
  rc=False,output_file
3527
+ #Cleanup first
3528
+ if isinstance(output_file,str) and output_file:
3529
+ if os.path.isfile(output_file):
3530
+ os.unlink(output_file)
3531
+
3524
3532
  try:
3525
3533
  # Navigate to the URL
3526
3534
  driver.get(url)
@@ -3734,18 +3742,19 @@ class OCR:
3734
3742
  Import('cv2',install_name='opencv-python')
3735
3743
  else:
3736
3744
  Import('easyocr')
3745
+ Import('logging')
3737
3746
  if self.enhance:
3738
3747
  Import('PIL',install_name='Pillow')
3739
- Import('numpy')
3740
- self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
3741
- # Suppress Torch pin_memory warning
3748
+ logging.getLogger('easyocr').setLevel(logging.ERROR)
3749
+ warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3742
3750
  warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3743
-
3744
3751
  # Suppress EasyOCR CPU warning
3745
3752
  #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3746
3753
 
3747
- # Suppress NetworkX backend warning
3748
- warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3754
+ #self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
3755
+ self.reader = easyocr.Reader(self.language, gpu=self.gpu, detector='dbnet18',
3756
+ model_storage_directory=self.model_storage_directory,
3757
+ download_enabled=False)
3749
3758
 
3750
3759
  def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
3751
3760
  if not image_file: image_file=self.image_file
@@ -3777,7 +3786,7 @@ class OCR:
3777
3786
  kernel = np.ones((2, 2), np.uint8)
3778
3787
  cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
3779
3788
  lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
3780
- if not lang or lang == 'en': lang='eng'
3789
+ if not lang or IsIn(lang,['en','eng','english']): lang='eng'
3781
3790
  try:
3782
3791
  text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
3783
3792
  except:
@@ -3789,6 +3798,9 @@ class OCR:
3789
3798
  else:
3790
3799
  opts={}
3791
3800
  opts['detail']=detail
3801
+ opts['batch_size']=2
3802
+ opts['contrast_ths']=0.3
3803
+ opts['allowlist']='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:/-_@,"+'
3792
3804
  if isinstance(low_text,float): opts['low_text']=low_test
3793
3805
  if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3794
3806
  if self.enhance:
@@ -3797,11 +3809,7 @@ class OCR:
3797
3809
  image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3798
3810
  image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3799
3811
  image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3800
- # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3801
3812
  image.save(image_file)
3802
- # image_np = numpy.array(image)
3803
- # return self.reader.readtext(image_np,**opts)
3804
- # else:
3805
3813
  text=self.reader.readtext(image_file,**opts)
3806
3814
  if output is str:
3807
3815
  return ' '.join(text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kmisc
3
- Version: 2.1.125
3
+ Version: 2.1.127
4
4
  Summary: Enginering useful library
5
5
  Home-page: https://github.com/kagepark/kmisc
6
6
  Author: Kage Park
File without changes
File without changes
File without changes
File without changes
File without changes