PyPI - kmisc - Versions diffs - 2.1.123__py3-none-any.whl → 2.1.125__py3-none-any.whl - Mend

kmisc 2.1.123py3-none-any.whl → 2.1.125py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

kmisc/__init__.py CHANGED Viewed

@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
     if default in ['org',{'org'}]: return src
     return default
-def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
+def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
     #auth_fields.submit.type : name    : login button with name
     #                        : id      : login button with id
     #                        : submit  : submit button without name or id
@@ -3492,7 +3492,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
         ocr=None
         if capture_method != 'file':
-            ocr=OCR(enhance=ocr_enhance)
+            ocr=OCR(enhance=ocr_enhance,module=ocr_module)
         # Configure Chrome options for headless mode
         from selenium.webdriver.chrome.options import Options
         from selenium.webdriver.common.by import By
@@ -3513,6 +3513,12 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
         # Initialize the Chrome driver
         driver = selenium.webdriver.Chrome(options=chrome_options)
         if image_size.lower() in ['full','fullscreen','full_screen','auto']:
+            #original_size = driver.get_window_size()
+            #full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
+            #full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
+            #driver.set_window_size(full_width, full_height)
+            # code save screenshot
+            #driver.set_window_size(original_size['width'], original_size['height']) #restore size
             driver.maximize_window()
         rc=False,output_file
         try:
@@ -3655,10 +3661,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
                                 else:
                                     printf(found_strings,log=log,mode='d')
                                 if find_string:
-                                    if find_string in found_strings:
-                                        #Find exit string, So True, So True, So True, So True
-                                        driver.quit()
-                                        return True
+                                    if not isinstance(find_string,list):
+                                        find_string=[find_string]
+                                    for ff in find_string:
+                                        if ff in found_strings:
+                                            #Find exit string, So True, So True, So True, So True
+                                            driver.quit()
+                                            return True
                             backup_idx+=1
                         #capture interval
                         time.sleep(wait_time)
@@ -3693,7 +3702,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
             #Background running
             if daemon:
                 t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file))
-                return t
+                return True,t
             else:
                 #Single process running
                 rc=_capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file)
@@ -3712,42 +3721,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
 #        return rc
 class OCR:
-    def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
+    def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
         self.enhance=enhance
         self.image_file=image_file
-        Import('easyocr')
-        if self.enhance:
-            Import('PIL',install_name='Pillow')
-            Import('numpy')
-        self.reader = easyocr.Reader(language,gpu=gpu,model_storage_directory=model_storage_directory)
-        # Suppress Torch pin_memory warning
-        warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
+        self.ocr_module=ocr_module
+        self.language=language
+        self.gpu=gpu
+        self.model_storage_directory=model_storage_directory
+        if self.ocr_module == 'pytesseract':
+            Import('pytesseract')
+            Import('import numpy as np')
+            Import('cv2',install_name='opencv-python')
+        else:
+            Import('easyocr')
+            if self.enhance:
+                Import('PIL',install_name='Pillow')
+                Import('numpy')
+            self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
+            # Suppress Torch pin_memory warning
+            warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
-        # Suppress EasyOCR CPU warning
-        #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
+            # Suppress EasyOCR CPU warning
+            #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
-        # Suppress NetworkX backend warning
-        warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
+            # Suppress NetworkX backend warning
+            warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
-    def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
+    def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
         if not image_file: image_file=self.image_file
         if not image_file: return False
-        opts={}
-        opts['detail']=detail
-        if isinstance(low_text,float): opts['low_text']=low_test
-        if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
-        if self.enhance:
-            image = PIL.Image.open(image_file)
-            image = image.convert('L') #Grayscale
-            image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
-            image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
-            image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0)  # Adjust threshold if needed
-#            image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
-            image.save(image_file)
-#            image_np = numpy.array(image)
-#            return self.reader.readtext(image_np,**opts)
-#        else:
-        return self.reader.readtext(image_file,**opts)
+        if not os.path.isfile(image_file): return False
+        if self.ocr_module == 'pytesseract':
+            image = cv2.imread(image_file)
+            # Convert to grayscale
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            # Invert the image to make text black on white (Tesseract prefers this)
+            inverted = cv2.bitwise_not(gray)
+            # Light noise reduction with small Gaussian blur (fast)
+            blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
+            # Optional CLAHE for contrast (comment out if too slow; it's generally fast)
+            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+            enhanced = clahe.apply(blurred)
+            # Small upscale (1.5x) for better DPI without heavy computation
+            scale_factor = 1.5
+            resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)  # LINEAR is faster than CUBIC
+            # Adaptive thresholding for varying console text quality
+            thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
+            # Minimal morphological cleanup (small kernel for speed)
+            kernel = np.ones((2, 2), np.uint8)
+            cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
+            lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
+            if not lang or lang == 'en': lang='eng'
+            try:
+                text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
+            except:
+                text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
+            if output is str:
+                return text
+            else:
+                return text.split()
+        else:
+            opts={}
+            opts['detail']=detail
+            if isinstance(low_text,float): opts['low_text']=low_test
+            if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
+            if self.enhance:
+                image = PIL.Image.open(image_file)
+                image = image.convert('L') #Grayscale
+                image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
+                image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
+                image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0)  # Adjust threshold if needed
+#                image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
+                image.save(image_file)
+#                image_np = numpy.array(image)
+#                return self.reader.readtext(image_np,**opts)
+#            else:
+            text=self.reader.readtext(image_file,**opts)
+            if output is str:
+                return ' '.join(text)
+            else:
+                return text
 ############################################
 #Temporary function map for replacement

{kmisc-2.1.123.dist-info → kmisc-2.1.125.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: kmisc
-Version: 2.1.123
+Version: 2.1.125
 Summary: Enginering useful library
 Home-page: https://github.com/kagepark/kmisc
 Author: Kage Park

kmisc-2.1.125.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+kmisc/__init__.py,sha256=clNYHP2yuY6LqEG9lobGvdMyIA3EocDSsTJUfZTIs9U,163729
+kmisc-2.1.125.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
+kmisc-2.1.125.dist-info/METADATA,sha256=7na-3ZkC58XtrxZtn77ONgxR39jKufWqZLdEjo0dZxs,5523
+kmisc-2.1.125.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+kmisc-2.1.125.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
+kmisc-2.1.125.dist-info/RECORD,,

kmisc-2.1.123.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-kmisc/__init__.py,sha256=pu-2QG3sY7W_Vb6eJmpo7Mytvjs4DfaJseGNJN1GmgA,160300
-kmisc-2.1.123.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
-kmisc-2.1.123.dist-info/METADATA,sha256=S9rNpdC9R3EUiB9m6p-DZyZcq16Sle34LwxrSzBaYoc,5523
-kmisc-2.1.123.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-kmisc-2.1.123.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
-kmisc-2.1.123.dist-info/RECORD,,

{kmisc-2.1.123.dist-info → kmisc-2.1.125.dist-info}/LICENSE RENAMED Viewed

File without changes

{kmisc-2.1.123.dist-info → kmisc-2.1.125.dist-info}/WHEEL RENAMED Viewed

File without changes

{kmisc-2.1.123.dist-info → kmisc-2.1.125.dist-info}/top_level.txt RENAMED Viewed

File without changes

kmisc 2.1.123__py3-none-any.whl → 2.1.125__py3-none-any.whl

kmisc 2.1.123py3-none-any.whl → 2.1.125py3-none-any.whl