PyPI - py2ls - Versions diffs - 0.2.4.25__py3-none-any.whl → 0.2.4.27__py3-none-any.whl - Mend

py2ls 0.2.4.25py3-none-any.whl → 0.2.4.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

py2ls/.DS_Store +0 -0
py2ls/.git/index +0 -0
py2ls/corr.py +475 -0
py2ls/data/.DS_Store +0 -0
py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
py2ls/data/styles/.DS_Store +0 -0
py2ls/data/styles/example/.DS_Store +0 -0
py2ls/data/usages_sns.json +6 -1
py2ls/ips.py +1059 -114
py2ls/ml2ls.py +758 -186
py2ls/netfinder.py +204 -20
py2ls/ocr.py +60 -4
py2ls/plot.py +916 -141
{py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/METADATA +6 -1
{py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/RECORD +16 -14
py2ls/data/usages_pd copy.json +0 -1105
{py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/WHEEL +0 -0

py2ls/netfinder.py CHANGED Viewed

@@ -626,7 +626,7 @@ def filter_links(links, contains="html", driver="requ", booster=False):
         )
         if condition:
             filtered_links.append(link)
-    return filtered_links
+    return ips.unique(filtered_links)
 def find_domain(links):
@@ -717,7 +717,7 @@ def downloader(
     kind=[".pdf"],
     contains=None,
     rm_folder=False,
-    booster=False,
+    booster=True,# use find_links
     verbose=True,
     timeout=30,
     n_try=3,
@@ -726,7 +726,7 @@ def downloader(
     from requests.exceptions import ChunkedEncodingError, ConnectionError
-    if verbose:
+    if verbose and ips.run_once_within():
         print(
             "usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
         )
@@ -734,8 +734,11 @@ def downloader(
     def fname_corrector(fname, ext):
         if not ext.startswith("."):
             ext = "." + ext
-        if not fname.endswith("ext"):  # if not ext in fname:
+        if not fname.endswith(ext):  # if not ext in fname:
             fname = fname[: -len(ext)] + ext
+        if not any(fname[: -len(ext)]):
+            from datetime import datetime
+            fname = datetime.now().strftime("%H%M%S") + ext
         return fname
     def check_and_modify_filename(directory, filename):
@@ -784,8 +787,8 @@ def downloader(
             kind[i] = "." + kind[i]
     file_links_all = []
     for kind_ in kind:
-        if isinstance(contains, str):
-            contains = [contains]
+        # if isinstance(contains, str):
+        #     contains = [contains]
         if isinstance(url, str):
             if any(ext in url for ext in kind):
                 file_links = [url]
@@ -799,7 +802,7 @@ def downloader(
                 if contains is not None:
                     file_links = filter_links(links_all, contains=contains + kind_)
                 else:
-                    file_links = links_all  # filter_links(links_all, contains=kind_)
+                    file_links = filter_links(links_all, contains=kind_)#links_all  #
         elif isinstance(url, list):
             links_all = url
             if contains is not None:
@@ -812,6 +815,7 @@ def downloader(
                 file_links = filter_links(links_all, contains=contains + kind_)
             else:
                 file_links = filter_links(links_all, contains=kind_)
+        file_links=ips.unique(file_links)
         if verbose:
             if file_links:
                 from pprint import pp
@@ -825,6 +829,7 @@ def downloader(
             file_links_all = [file_links]
         elif isinstance(file_links, list):
             file_links_all.extend(file_links)
+    file_links_all=ips.unique(file_links_all)
     if dir_save:
         if rm_folder:
             ips.rm_folder(dir_save)
@@ -847,7 +852,7 @@ def downloader(
                         )
                         if ext is None:
                             ext = kind_
-                        print("ehereerere", ext)
                         if ext:
                             corrected_fname = fname_corrector(fnames[idx], ext)
                             corrected_fname = check_and_modify_filename(
@@ -860,13 +865,13 @@ def downloader(
                                     datetime.now().strftime("%y%m%d_%H%M%S_")
                                     + corrected_fname
                                 )
-                            fpath_tmp = os.path.join(dir_save, corrected_fname)
+                            fpath_tmp = os.path.join(dir_save, corrected_fname)
                             with open(fpath_tmp, "wb") as file:
                                 for chunk in response.iter_content(chunk_size=8192):
                                     if chunk:  # Filter out keep-alive chunks
                                         file.write(chunk)
                             if verbose:
-                                print(f"Done! {fnames[idx]}")
+                                print(f"Done⤵{fnames[idx]}")
                         else:
                             if verbose:
                                 print(f"Unknown file type for {file_link}")
@@ -886,16 +891,7 @@ def downloader(
             if itry == n_try:
                 print(f"Failed to download {file_link} after {n_try} attempts.")
-        # print(f"\n{len(fnames)} files were downloaded:")
-        if verbose:
-            from pprint import pp
-            if corrected_fname:
-                pp(corrected_fname)
-                print(f"\n\nsaved @:\n{dir_save}")
-            else:
-                pp(fnames)
 def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=True):
@@ -1612,3 +1608,191 @@ def ai(*args, **kwargs):
     if len(args) == 1 and isinstance(args[0], str):
         kwargs["query"] = args[0]
     return echo(**kwargs)
+#! get_ip()
+def get_ip(ip=None):
+    """
+    Usage:
+        from py2ls import netfinder as nt
+        ip = nt.get_ip()
+    """
+    import requests
+    import time
+    import logging
+    from datetime import datetime, timedelta
+    # Set up logging configuration
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(levelname)s - %(message)s",
+        handlers=[
+            logging.StreamHandler(),
+            logging.FileHandler("public_ip_log.log"),  # Log to a file
+        ],
+    )
+    cache = {}
+    # Function to fetch IP addresses synchronously
+    def fetch_ip(url, retries, timeout, headers):
+        """
+        Synchronous function to fetch the IP address with retries.
+        """
+        for attempt in range(retries):
+            try:
+                response = requests.get(url, timeout=timeout, headers=headers)
+                response.raise_for_status()
+                return response.json()
+            except requests.RequestException as e:
+                logging.error(f"Attempt {attempt + 1} failed: {e}")
+                if attempt < retries - 1:
+                    time.sleep(2**attempt)  # Exponential backoff
+                else:
+                    logging.error("Max retries reached.")
+                    return {"error": f"Error fetching IP: {e}"}
+            except requests.Timeout:
+                logging.error("Request timed out")
+                time.sleep(2**attempt)
+        return {"error": "Failed to fetch IP after retries"}
+    # Function to fetch geolocation synchronously
+    def fetch_geolocation(url, retries, timeout, headers):
+        """
+        Synchronous function to fetch geolocation data by IP address.
+        """
+        for attempt in range(retries):
+            try:
+                response = requests.get(url, timeout=timeout, headers=headers)
+                response.raise_for_status()
+                return response.json()
+            except requests.RequestException as e:
+                logging.error(f"Geolocation request attempt {attempt + 1} failed: {e}")
+                if attempt < retries - 1:
+                    time.sleep(2**attempt)  # Exponential backoff
+                else:
+                    logging.error("Max retries reached.")
+                    return {"error": f"Error fetching geolocation: {e}"}
+            except requests.Timeout:
+                logging.error("Geolocation request timed out")
+                time.sleep(2**attempt)
+        return {"error": "Failed to fetch geolocation after retries"}
+    # Main function to get public IP and geolocation
+    def get_public_ip(
+        ip4=True,
+        ip6=True,
+        verbose=True,
+        retries=3,
+        timeout=5,
+        geolocation=True,
+        headers=None,
+        cache_duration=5,
+    ):
+        """
+        Synchronously fetches public IPv4 and IPv6 addresses, along with optional geolocation info.
+        """
+        # Use the cache if it's still valid
+        cache_key_ip4 = "public_ip4"
+        cache_key_ip6 = "public_ip6"
+        cache_key_geolocation = "geolocation"
+        if (
+            cache
+            and cache_key_ip4 in cache
+            and datetime.now() < cache[cache_key_ip4]["expires"]
+        ):
+            logging.info("Cache hit for IPv4, using cached data.")
+            ip4_data = cache[cache_key_ip4]["data"]
+        else:
+            ip4_data = None
+        if (
+            cache
+            and cache_key_ip6 in cache
+            and datetime.now() < cache[cache_key_ip6]["expires"]
+        ):
+            logging.info("Cache hit for IPv6, using cached data.")
+            ip6_data = cache[cache_key_ip6]["data"]
+        else:
+            ip6_data = None
+        if (
+            cache
+            and cache_key_geolocation in cache
+            and datetime.now() < cache[cache_key_geolocation]["expires"]
+        ):
+            logging.info("Cache hit for Geolocation, using cached data.")
+            geolocation_data = cache[cache_key_geolocation]["data"]
+        else:
+            geolocation_data = None
+        # Fetch IPv4 if requested
+        if ip4 and not ip4_data:
+            logging.info("Fetching IPv4...")
+            ip4_data = fetch_ip(
+                "https://api.ipify.org?format=json", retries, timeout, headers
+            )
+            cache[cache_key_ip4] = {
+                "data": ip4_data,
+                "expires": datetime.now() + timedelta(minutes=cache_duration),
+            }
+        # Fetch IPv6 if requested
+        if ip6 and not ip6_data:
+            logging.info("Fetching IPv6...")
+            ip6_data = fetch_ip(
+                "https://api6.ipify.org?format=json", retries, timeout, headers
+            )
+            cache[cache_key_ip6] = {
+                "data": ip6_data,
+                "expires": datetime.now() + timedelta(minutes=cache_duration),
+            }
+        # Fetch geolocation if requested
+        if geolocation and not geolocation_data:
+            logging.info("Fetching Geolocation...")
+            geolocation_data = fetch_geolocation(
+                "https://ipinfo.io/json", retries, timeout, headers
+            )
+            cache[cache_key_geolocation] = {
+                "data": geolocation_data,
+                "expires": datetime.now() + timedelta(minutes=cache_duration),
+            }
+        # Prepare the results
+        ip_info = {
+            "ip4": ip4_data.get("ip") if ip4_data else "N/A",
+            "ip6": ip6_data.get("ip") if ip6_data else "N/A",
+            "geolocation": geolocation_data if geolocation_data else "N/A",
+        }
+        # Verbose output if requested
+        if verbose:
+            print(f"Public IPv4: {ip_info['ip4']}")
+            print(f"Public IPv6: {ip_info['ip6']}")
+            print(f"Geolocation: {ip_info['geolocation']}")
+        return ip_info
+    # Function to get geolocation data by IP
+    def get_geolocation_by_ip(ip, retries=3, timeout=5, headers=None):
+        """
+        Fetches geolocation data for a given IP address.
+        """
+        url = f"https://ipinfo.io/{ip}/json"
+        geolocation_data = fetch_geolocation(url, retries, timeout, headers)
+        return geolocation_data
+    #! here starting get_ip()
+    headers = {"User-Agent": user_agent()}
+    if ip is None:
+        try:
+            ip_data = get_public_ip(headers=headers, verbose=True)
+        except Exception as e:
+            print(e)
+            ip_data = None
+        return ip_data
+    else:
+        geolocation_data = get_geolocation_by_ip(ip, headers=headers)
+        return geolocation_data

py2ls/ocr.py CHANGED Viewed

@@ -486,6 +486,18 @@ def preprocess_img(
     return img_preprocessed
+def convert_image_to_bytes(image):
+    """
+    Convert a CV2 or numpy image to bytes for ddddocr.
+    """
+    import io
+    # Convert OpenCV image (numpy array) to PIL image
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+    # Save PIL image to a byte stream
+    img_byte_arr = io.BytesIO()
+    image.save(img_byte_arr, format='PNG')
+    return img_byte_arr.getvalue()
 def text_postprocess(
     text,
@@ -604,10 +616,11 @@ def get_text(
     """
         )
-    models = ["easyocr", "paddleocr", "pytesseract"]
+    models = ["easyocr", "paddleocr", "pytesseract","ddddocr"]
     model = strcmp(model, models)[0]
     lang = lang_auto_detect(lang, model)
     if isinstance(image, str):
+        dir_img=image
         image = cv2.imread(image)
     # Ensure lang is always a list
@@ -705,9 +718,10 @@ def get_text(
         )  # PaddleOCR supports only one language at a time
         result = ocr.ocr(image_process, **kwargs)
         detections = []
-        for line in result[0]:
-            bbox, (text, score) = line
-            detections.append((bbox, text, score))
+        if result[0] is not None:
+            for line in result[0]:
+                bbox, (text, score) = line
+                detections.append((bbox, text, score))
         if postprocess is None:
             postprocess = dict(
                 spell_check=True,
@@ -787,7 +801,49 @@ def get_text(
             else:
                 # 默认返回所有检测信息
                 return detections
+    elif "ddddocr" in  model.lower():
+        import ddddocr
+        ocr = ddddocr.DdddOcr(det=False, ocr=True)
+        image_bytes = convert_image_to_bytes(image_process)
+        results = ocr.classification(image_bytes)  # Text extraction
+        # Optional: Perform detection for bounding boxes
+        detections = []
+        if kwargs.get("det", False):
+            det_ocr = ddddocr.DdddOcr(det=True)
+            det_results = det_ocr.detect(image_bytes)
+            for box in det_results:
+                top_left = (box[0], box[1])
+                bottom_right = (box[2], box[3])
+                detections.append((top_left, bottom_right))
+        if postprocess is None:
+            postprocess = dict(
+                spell_check=True,
+                clean=True,
+                filter=dict(min_length=2),
+                pattern=None,
+                merge=True,
+            )
+            text_corr = []
+            [
+                text_corr.extend(text_postprocess(text, **postprocess))
+                for _, text, _ in detections
+            ]
+        # Visualization
+        if show:
+            if ax is None:
+                ax = plt.gca()
+            image_vis = image.copy()
+            if detections:
+                for top_left, bottom_right in detections:
+                    cv2.rectangle(image_vis, top_left, bottom_right, box_color, 2)
+            image_vis = cv2.cvtColor(image_vis, cmap)
+            ax.imshow(image_vis)
+            ax.axis("off")
+        return detections
     else:  # "pytesseract"
         if ax is None:
             ax = plt.gca()

py2ls 0.2.4.25__py3-none-any.whl → 0.2.4.27__py3-none-any.whl

py2ls 0.2.4.25py3-none-any.whl → 0.2.4.27py3-none-any.whl