PyPI - kmisc - Versions diffs - 2.1.122__py3-none-any.whl → 2.1.124__py3-none-any.whl - Mend

kmisc 2.1.122py3-none-any.whl → 2.1.124py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

kmisc/__init__.py CHANGED Viewed

@@ -3453,7 +3453,7 @@ def Upper(src,default='org'):
     if default in ['org',{'org'}]: return src
     return default
-def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
+def web_capture(url,output_file,image_size='full',wait_time=3,ignore_certificate_error=False,username=None,password=None,auth_fields={'auth':{'type':'name','name':('username','password')},'submit':{'type':'submit','name':None}},next_do={},gpu=False,live_capture=0,capture_method='file',capture_type='png',video_file=None,ocr_module='easyocr',find_string=None,found_space='\n',log=None,ocr_enhance=False,daemon=False,backup=False):
     #auth_fields.submit.type : name    : login button with name
     #                        : id      : login button with id
     #                        : submit  : submit button without name or id
@@ -3463,6 +3463,8 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
     #auth_fields.auth.name   : (usernane,password)    : username/password field string for name or id
     #next_do                 : put data and click submit
+    _url=url.split('/')
+    url=WEB().url_join(*_url[1:],method=_url[0])
     if isinstance(image_size,str):
         if 'x' in image_size:
             image_size=image_size.split('x')
@@ -3475,8 +3477,9 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
     if isinstance(image_size,(list,tuple)) and len(image_size) == 2:
         image_size=','.join([str(i) for i in image_size])
     else:
-        #Set it to default image size
-        image_size='1920,1080'
+        if not IsIn(image_size,[None,'full','fullscreen','full_screen','auto']):
+            #Set it to default image size
+            image_size='1920,1080'
     if Import('import selenium'):
         return False,'Can not install selenium package'
@@ -3484,10 +3487,13 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
         if backup:
             Import('filecmp')
             Import('shutil')
+        if capture_type in ['mov','mp4']:
+            print('????load cv2')
+            Import('cv2',install_name='opencv-python')
         ocr=None
         if capture_method != 'file':
-            ocr=OCR(enhance=ocr_enhance)
+            ocr=OCR(enhance=ocr_enhance,module=ocr_module)
         # Configure Chrome options for headless mode
         from selenium.webdriver.chrome.options import Options
         from selenium.webdriver.common.by import By
@@ -3497,7 +3503,9 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
         chrome_options.add_argument('--headless')  # Run in headless mode
         chrome_options.add_argument('--no-sandbox')
         chrome_options.add_argument('--disable-dev-shm-usage')
-        chrome_options.add_argument(f"--window-size={image_size}")  # Set window size
+        if image_size.lower() not in ['full','fullscreen','full_screen','auto']:
+            chrome_options.add_argument(f"--window-size={image_size}")  # Set window size
         if not gpu:
             chrome_options.add_argument("--disable-gpu")
         if ignore_certificate_error:
@@ -3505,6 +3513,14 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
             chrome_options.add_argument('--allow-insecure-localhost')  # gnore-certificate-errors
         # Initialize the Chrome driver
         driver = selenium.webdriver.Chrome(options=chrome_options)
+        if image_size.lower() in ['full','fullscreen','full_screen','auto']:
+            #original_size = driver.get_window_size()
+            #full_width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
+            #full_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
+            #driver.set_window_size(full_width, full_height)
+            # code save screenshot
+            #driver.set_window_size(original_size['width'], original_size['height']) #restore size
+            driver.maximize_window()
         rc=False,output_file
         try:
             # Navigate to the URL
@@ -3559,7 +3575,7 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
                     next_do_button = driver.find_element(By.XPATH, "//button[@type='submit']")
                 next_do_button.click()
-            def _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon):
+            def _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file):
                 def wait_body(driver,timeout=10):
                     #wait until get screen data
                     try:
@@ -3578,23 +3594,37 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
                     time.sleep(timeout)
                 live_capture=Int(live_capture)
-                wait_time=Int(wait_time,10)
+                if not isinstance(wait_time,(int,float)):
+                    wait_time=Int(wait_time,10)
                 backup_idx=0
                 if backup:
                     backup=Int(backup,2)
                 if isinstance(live_capture,int) and live_capture > wait_time*2:
+                    #Keep capture
                     Time=TIME()
+                    if capture_type in ['mov','mp4']:
+                        frame_rate=1/wait_time
+                        window_size = driver.get_window_size()
+                        width = window_size['width']
+                        height = window_size['height']
+                        fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for MP4
+                        video_writer = cv2.VideoWriter(video_file, fourcc, frame_rate, (width, height))
                     while True:
                         if Time.Out(live_capture):
                             driver.quit()
-                            return False
+                            if capture_type in ['mov','mp4']:
+                                video_writer.release()
+                                return True
+                            else:
+                                #Do something, but not return until timeout. So return False
+                                return False
                         # Capture screenshot
                         if log:
-                            if log in ['screen','log','print',print]:
+                            if IsIn(log,['screen','log','print',print]):
                                 printf(Dot(),direct=True)
                             else:
                                 printf(Dot(),log=log,direct=True)
-                        if backup:
+                        if backup and capture_type not in ['mov','mp4']:
                             save_file='{}.{}'.format(output_file,backup_idx%backup)
                         else:
                             save_file=output_file
@@ -3602,43 +3632,85 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
                         wait_body(driver,timeout=wait_time)
                         #capture
                         driver.save_screenshot(save_file)
-                        if backup:
-                            if backup == 2:
-                                comp_a=f'{output_file}.0'
-                                comp_b=f'{output_file}.1'
-                                if os.path.isfile(comp_a) and os.path.isfile(comp_b):
-                                    if filecmp.cmp(comp_a,comp_b):
-                                        if log:
-                                            if log in ['screen','log','print',print]:
-                                                printf(Dot(),direct=True)
-                                            else:
-                                                printf(Dot(),log=log,direct=True)
-                                        time.sleep(wait_time)
-                                        backup_idx+=1
-                                        continue
-                            shutil.copy2(save_file,output_file)
-                        if IsIn(capture_method,['log','screen','text']):
-                            found_words=ocr.Text(image_file=save_file)
-                            found_strings=found_space.join(found_words)
-                            printf(found_strings,log=log,mode='d' if log else 's')
-                            if find_string:
-                                if find_string in found_strings:
-                                    driver.quit()
-                                    return True
+                        if capture_type in ['mov','mp4']:
+                            #Make a video file
+                            frame = cv2.imread(save_file)
+                            frame = cv2.resize(frame, (width, height))
+                            video_writer.write(frame)
+                        else:
+                            #do something with picture file
+                            if backup:
+                                if backup == 2:
+                                    comp_a=f'{output_file}.0'
+                                    comp_b=f'{output_file}.1'
+                                    if os.path.isfile(comp_a) and os.path.isfile(comp_b):
+                                        if filecmp.cmp(comp_a,comp_b):
+                                            if log:
+                                                if IsIn(log,['screen','log','print',print]):
+                                                    printf(Dot(),direct=True)
+                                                else:
+                                                    printf(Dot(),log=log,direct=True)
+                                            time.sleep(wait_time)
+                                            backup_idx+=1
+                                            continue
+                                shutil.copy2(save_file,output_file)
+                            if IsIn(capture_method,['log','screen','text']):
+                                found_words=ocr.Text(image_file=save_file)
+                                found_strings=found_space.join(found_words)
+                                if IsIn(log,['screen','log','print',print,None]):
+                                    printf(found_strings,mode='s')
+                                else:
+                                    printf(found_strings,log=log,mode='d')
+                                if find_string:
+                                    if not isinstance(find_string,list):
+                                        find_string=[find_string]
+                                    for ff in find_string:
+                                        if ff in found_strings:
+                                            #Find exit string, So True, So True, So True, So True
+                                            driver.quit()
+                                            return True
+                            backup_idx+=1
+                        #capture interval
                         time.sleep(wait_time)
-                        backup_idx+=1
                 else:
+                    #Single capture
                     #wait
-                    wait_body(driver,timeout=wait_time)
+                    #wait_body(driver,timeout=wait_time)
+                    time.sleep(wait_time)
                     #capture
                     driver.save_screenshot(output_file)
-                driver.quit()
+                    if IsIn(capture_method,['log','screen','text']):
+                        found_words=ocr.Text(image_file=output_file)
+                        found_strings=found_space.join(found_words)
+                        if IsIn(log,['screen','log','print',print,None]):
+                            printf(found_strings,mode='s')
+                        else:
+                            printf(found_strings,log=log,mode='d')
+                        if find_string:
+                            if find_string in found_strings:
+                                driver.quit()
+                                return True
+                    driver.quit()
+                    if IsIn(capture_method,['text']):
+                        return found_strings
+                    return True
+            ##
+            if IsIn(capture_type,['mov','mp4']):
+                if not video_file:
+                    video_file='{}.mp4'.format('.'.join(output_file.split('.')[:-1]))
+            #Background running
             if daemon:
-                t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon))
+                t=kThread(target=_capture_, args=(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file))
                 return t
             else:
-                _capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon)
-                return True,output_file
+                #Single process running
+                rc=_capture_(live_capture,driver,output_file,wait_time,capture_method,backup,ocr,log,find_string,daemon,video_file)
+                if IsIn(capture_type,['mov','mp4']):
+                    return rc,video_file
+                else:
+                    return rc,output_file
         except Exception as e:
             #print(f"Error capturing screenshot: {str(e)}")
@@ -3650,42 +3722,92 @@ def web_capture(url,output_file,image_size='1920,1080',wait_time=3,ignore_certif
 #        return rc
 class OCR:
-    def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,**opts):
+    def __init__(self,image_file=None,enhance=False,language=['en'],gpu=False,model_storage_directory=None,ocr_module='easyocr',**opts):
         self.enhance=enhance
         self.image_file=image_file
-        Import('easyocr')
-        if self.enhance:
-            Import('PIL',install_name='Pillow')
-            Import('numpy')
-        self.reader = easyocr.Reader(language,gpu=gpu,model_storage_directory=model_storage_directory)
-        # Suppress Torch pin_memory warning
-        warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
+        self.ocr_module=ocr_module
+        self.language=language
+        self.gpu=gpu
+        self.model_storage_directory=model_storage_directory
+        if self.ocr_module == 'pytesseract':
+            Import('pytesseract')
+            Import('import numpy as np')
+            Import('cv2',install_name='opencv-python')
+        else:
+            Import('easyocr')
+            if self.enhance:
+                Import('PIL',install_name='Pillow')
+                Import('numpy')
+            self.reader = easyocr.Reader(self.language,gpu=gpu,model_storage_directory=model_storage_directory)
+            # Suppress Torch pin_memory warning
+            warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")
-        # Suppress EasyOCR CPU warning
-        #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
+            # Suppress EasyOCR CPU warning
+            #warnings.filterwarnings("ignore", message="WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.")
-        # Suppress NetworkX backend warning
-        warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
+            # Suppress NetworkX backend warning
+            warnings.filterwarnings("ignore", category=RuntimeWarning, module="networkx.utils.backends")
-    def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None):
+    def Text(self,detail=0,low_text=None,contrast_ths=None,image_file=None,output=str):
         if not image_file: image_file=self.image_file
         if not image_file: return False
-        opts={}
-        opts['detail']=detail
-        if isinstance(low_text,float): opts['low_text']=low_test
-        if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
-        if self.enhance:
-            image = PIL.Image.open(image_file)
-            image = image.convert('L') #Grayscale
-            image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
-            image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
-            image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0)  # Adjust threshold if needed
-#            image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
-            image.save(image_file)
-#            image_np = numpy.array(image)
-#            return self.reader.readtext(image_np,**opts)
-#        else:
-        return self.reader.readtext(image_file,**opts)
+        if not os.path.isfile(image_file): return False
+        if self.ocr_module == 'pytesseract':
+            image = cv2.imread(image_file)
+            # Convert to grayscale
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            # Invert the image to make text black on white (Tesseract prefers this)
+            inverted = cv2.bitwise_not(gray)
+            # Light noise reduction with small Gaussian blur (fast)
+            blurred = cv2.GaussianBlur(inverted, (3, 3), 0)
+            # Optional CLAHE for contrast (comment out if too slow; it's generally fast)
+            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+            enhanced = clahe.apply(blurred)
+            # Small upscale (1.5x) for better DPI without heavy computation
+            scale_factor = 1.5
+            resized = cv2.resize(enhanced, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)  # LINEAR is faster than CUBIC
+            # Adaptive thresholding for varying console text quality
+            thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
+            # Minimal morphological cleanup (small kernel for speed)
+            kernel = np.ones((2, 2), np.uint8)
+            cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
+            lang=self.language[0] if isinstance(self.language,(list,tuple)) else self.language
+            if not lang or lang == 'en': lang='eng'
+            try:
+                text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang=lang).strip()
+            except:
+                text=pytesseract.image_to_string(cleaned, config=r'--oem 3 --psm 6', lang='eng').strip()
+            if output is str:
+                return text
+            else:
+                return text.split()
+        else:
+            opts={}
+            opts['detail']=detail
+            if isinstance(low_text,float): opts['low_text']=low_test
+            if isinstance(contrast_ths,float): opts['contrast_ths']=contrast_ths
+            if self.enhance:
+                image = PIL.Image.open(image_file)
+                image = image.convert('L') #Grayscale
+                image = PIL.ImageEnhance.Contrast(image).enhance(3.0) #high contrast
+                image = PIL.ImageEnhance.Sharpness(image).enhance(2.0)#Sharpen
+                image = image.convert('RGB').point(lambda p: 255 if p > 140 else 0)  # Adjust threshold if needed
+#                image = image.resize((800, int(800 * image.height / image.width)), PIL.Image.Resampling.LANCZOS)
+                image.save(image_file)
+#                image_np = numpy.array(image)
+#                return self.reader.readtext(image_np,**opts)
+#            else:
+            text=self.reader.readtext(image_file,**opts)
+            if output is str:
+                return ' '.join(text)
+            else:
+                return text
 ############################################
 #Temporary function map for replacement

{kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: kmisc
-Version: 2.1.122
+Version: 2.1.124
 Summary: Enginering useful library
 Home-page: https://github.com/kagepark/kmisc
 Author: Kage Park

kmisc-2.1.124.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+kmisc/__init__.py,sha256=9KNDkVxjqk5_f1h8TRJMwItMXSZFyXBAzwK4MKR-6aM,163758
+kmisc-2.1.124.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
+kmisc-2.1.124.dist-info/METADATA,sha256=0Jeai7LsZye2IPSLxdVv8EfoU37IpRSJYJb5D4J1io8,5523
+kmisc-2.1.124.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+kmisc-2.1.124.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
+kmisc-2.1.124.dist-info/RECORD,,

kmisc-2.1.122.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-kmisc/__init__.py,sha256=nX41gPkw15kBnuav_Xmh11ZBdVZKRJq7OT13qs2Vjn0,156878
-kmisc-2.1.122.dist-info/LICENSE,sha256=mn9ekhb34HJxsrVhcxrLXJUzy55T62zg-Gh9Ro0mVJI,1066
-kmisc-2.1.122.dist-info/METADATA,sha256=-e9Y6YbWJmJ8DT1bcCvIHlwClRtEgMUVHboS5EjxYXw,5523
-kmisc-2.1.122.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-kmisc-2.1.122.dist-info/top_level.txt,sha256=wvdHf5aQTqcGYvxk-F9E_BMWLMhlwC8INBmwO-V6_X4,6
-kmisc-2.1.122.dist-info/RECORD,,

{kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/LICENSE RENAMED Viewed

File without changes

{kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/WHEEL RENAMED Viewed

File without changes

{kmisc-2.1.122.dist-info → kmisc-2.1.124.dist-info}/top_level.txt RENAMED Viewed

File without changes

kmisc 2.1.122__py3-none-any.whl → 2.1.124__py3-none-any.whl

kmisc 2.1.122py3-none-any.whl → 2.1.124py3-none-any.whl