kmisc 2.1.125__py3-none-any.whl → 2.1.126__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kmisc/__init__.py CHANGED
@@ -3462,12 +3462,15 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3462
3462
  # : id : username/password with name id
3463
3463
  #auth_fields.auth.name : (usernane,password) : username/password field string for name or id
3464
3464
  #next_do : put data and click submit
3465
-
3466
3465
  _url=url.split('/')
3467
3466
  url=WEB().url_join(*_url[1:],method=_url[0])
3468
3467
  if isinstance(image_size,str):
3469
3468
  if 'x' in image_size:
3470
3469
  image_size=image_size.split('x')
3470
+ elif 'X' in image_size:
3471
+ image_size=image_size.split('X')
3472
+ elif '*' in image_size:
3473
+ image_size=image_size.split('*')
3471
3474
  elif ',' in image_size:
3472
3475
  image_size=image_size.split(',')
3473
3476
  elif '*' in image_size:
@@ -3477,9 +3480,10 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3477
3480
  if isinstance(image_size,(list,tuple)) and len(image_size) == 2:
3478
3481
  image_size=','.join([str(i) for i in image_size])
3479
3482
  else:
3480
- if not IsIn(image_size,[None,'full','fullscreen','full_screen','auto']):
3481
- #Set it to default image size
3482
- image_size='1920,1080'
3483
+ if not IsIn(image_size,['full','fullscreen','full_screen','full_size']):
3484
+ if IsIn(image_size,[None,'auto']):
3485
+ #Set it to default image size
3486
+ image_size='1920,1080'
3483
3487
 
3484
3488
  if Import('import selenium'):
3485
3489
  return False,'Can not install selenium package'
@@ -3502,8 +3506,9 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3502
3506
  chrome_options.add_argument('--headless') # Run in headless mode
3503
3507
  chrome_options.add_argument('--no-sandbox')
3504
3508
  chrome_options.add_argument('--disable-dev-shm-usage')
3509
+ chrome_options.add_argument('--disable-extensions')
3505
3510
 
3506
- if image_size.lower() not in ['full','fullscreen','full_screen','auto']:
3511
+ if image_size.lower() not in ['full','fullscreen','full_screen','full_size']:
3507
3512
  chrome_options.add_argument(f"--window-size={image_size}") # Set window size
3508
3513
  if not gpu:
3509
3514
  chrome_options.add_argument("--disable-gpu")
@@ -3512,15 +3517,18 @@ def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate
3512
3517
  chrome_options.add_argument('--allow-insecure-localhost') # gnore-certificate-errors
3513
3518
  # Initialize the Chrome driver
3514
3519
  driver = selenium.webdriver.Chrome(options=chrome_options)
3515
- if image_size.lower() in ['full','fullscreen','full_screen','auto']:
3520
+ if image_size.lower() in ['full','fullscreen','full_screen','full_size']:
3516
3521
  #original_size = driver.get_window_size()
3517
3522
  #full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
3518
3523
  #full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
3519
3524
  #driver.set_window_size(full_width, full_height)
3520
- # code save screenshot
3521
- #driver.set_window_size(original_size['width'], original_size['height']) #restore size
3522
3525
  driver.maximize_window()
3523
3526
  rc=False,output_file
3527
+ #Cleanup first
3528
+ if isinstance(output_file,str) and output_file:
3529
+ if os.path.isfile(output_file):
3530
+ os.unlink(output_file)
3531
+
3524
3532
  try:
3525
3533
  # Navigate to the URL
3526
3534
  driver.get(url)
@@ -3736,16 +3744,15 @@ class OCR:
3736
3744
  Import('easyocr')
3737
3745
  if self.enhance:
3738
3746
  Import('PIL',install_name='Pillow')
3739
- Import('numpy')
3740
- self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
3741
- # Suppress Torch pin_memory warning
3747
+ warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3742
3748
  warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
3743
-
3744
3749
  # Suppress EasyOCR CPU warning
3745
3750
  #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
3746
3751
 
3747
- # Suppress NetworkX backend warning
3748
- warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
3752
+ #self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
3753
+ self.reader = easyocr.Reader(self.language, gpu=self.gpu, detector='dbnet18',
3754
+ model_storage_directory=self.model_storage_directory,
3755
+ download_enabled=False)
3749
3756
 
3750
3757
  def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
3751
3758
  if not image_file: image_file=self.image_file
@@ -3777,7 +3784,7 @@ class OCR:
3777
3784
  kernel = np.ones((2, 2), np.uint8)
3778
3785
  cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
3779
3786
  lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
3780
- if not lang or lang == 'en': lang='eng'
3787
+ if not lang or IsIn(lang,['en','eng','english']): lang='eng'
3781
3788
  try:
3782
3789
  text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
3783
3790
  except:
@@ -3789,6 +3796,9 @@ class OCR:
3789
3796
  else:
3790
3797
  opts={}
3791
3798
  opts['detail']=detail
3799
+ opts['batch_size']=2
3800
+ opts['contrast_ths']=0.3
3801
+ opts['allowlist']='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:/-_@,"+'
3792
3802
  if isinstance(low_text,float): opts['low_text']=low_test
3793
3803
  if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
3794
3804
  if self.enhance:
@@ -3797,11 +3807,7 @@ class OCR:
3797
3807
  image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
3798
3808
  image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
3799
3809
  image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0) # Adjust threshold if needed
3800
- # image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
3801
3810
  image.save(image_file)
3802
- # image_np = numpy.array(image)
3803
- # return self.reader.readtext(image_np,**opts)
3804
- # else:
3805
3811
  text=self.reader.readtext(image_file,**opts)
3806
3812
  if output is str:
3807
3813
  return ' '.join(text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kmisc
3
- Version: 2.1.125
3
+ Version: 2.1.126
4
4
  Summary: Enginering useful library
5
5
  Home-page: https://github.com/kagepark/kmisc
6
6
  Author: Kage Park
@@ -0,0 +1,6 @@
1
+ kmisc/__init__.py,sha256=9Tr1tEZPHJ7CQXPJz6bjoEwW6IEOGIl1VBrBzgTjASo,164077
2
+ kmisc-2.1.126.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
+ kmisc-2.1.126.dist-info/METADATA,sha256=K7bGVkNJcPRqIiZeOxUUUskLANnK3p1J2agHVDBpGk8,5523
4
+ kmisc-2.1.126.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
+ kmisc-2.1.126.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
+ kmisc-2.1.126.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- kmisc/__init__.py,sha256=clNYHP2yuY6LqEG9lobGvdMyIA3EocDSsTJUfZTIs9U,163729
2
- kmisc-2.1.125.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
3
- kmisc-2.1.125.dist-info/METADATA,sha256=7na-3ZkC58XtrxZtn77ONgxR39jKufWqZLdEjo0dZxs,5523
4
- kmisc-2.1.125.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
5
- kmisc-2.1.125.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
6
- kmisc-2.1.125.dist-info/RECORD,,