oafuncs 0.0.97.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/__init__.py +54 -0
 - oafuncs/_script/__init__.py +27 -0
 - oafuncs/_script/plot_dataset.py +299 -0
 - oafuncs/data_store/OAFuncs.png +0 -0
 - oafuncs/data_store/hycom_3hourly.png +0 -0
 - oafuncs/oa_cmap.py +215 -0
 - oafuncs/oa_data.py +293 -0
 - oafuncs/oa_down/User_Agent-list.txt +6697 -0
 - oafuncs/oa_down/__init__.py +22 -0
 - oafuncs/oa_down/hycom_3hourly.py +1309 -0
 - oafuncs/oa_down/hycom_3hourly_20250129.py +1307 -0
 - oafuncs/oa_down/idm.py +50 -0
 - oafuncs/oa_down/literature.py +288 -0
 - oafuncs/oa_down/test_ua.py +151 -0
 - oafuncs/oa_down/user_agent.py +31 -0
 - oafuncs/oa_draw.py +326 -0
 - oafuncs/oa_file.py +413 -0
 - oafuncs/oa_help.py +144 -0
 - oafuncs/oa_model/__init__.py +19 -0
 - oafuncs/oa_model/roms/__init__.py +20 -0
 - oafuncs/oa_model/roms/test.py +19 -0
 - oafuncs/oa_model/wrf/__init__.py +18 -0
 - oafuncs/oa_model/wrf/little_r.py +186 -0
 - oafuncs/oa_nc.py +523 -0
 - oafuncs/oa_python.py +108 -0
 - oafuncs/oa_sign/__init__.py +21 -0
 - oafuncs/oa_sign/meteorological.py +168 -0
 - oafuncs/oa_sign/ocean.py +158 -0
 - oafuncs/oa_sign/scientific.py +139 -0
 - oafuncs/oa_tool/__init__.py +19 -0
 - oafuncs/oa_tool/email.py +114 -0
 - oafuncs/oa_tool/parallel.py +90 -0
 - oafuncs/oa_tool/time.py +22 -0
 - oafuncs-0.0.97.1.dist-info/LICENSE.txt +19 -0
 - oafuncs-0.0.97.1.dist-info/METADATA +106 -0
 - oafuncs-0.0.97.1.dist-info/RECORD +38 -0
 - oafuncs-0.0.97.1.dist-info/WHEEL +5 -0
 - oafuncs-0.0.97.1.dist-info/top_level.txt +1 -0
 
    
        oafuncs/oa_down/idm.py
    ADDED
    
    | 
         @@ -0,0 +1,50 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env python
         
     | 
| 
      
 2 
     | 
    
         
            +
            # coding=utf-8
         
     | 
| 
      
 3 
     | 
    
         
            +
            """
         
     | 
| 
      
 4 
     | 
    
         
            +
            Author: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 5 
     | 
    
         
            +
            Date: 2025-01-11 16:19:12
         
     | 
| 
      
 6 
     | 
    
         
            +
            LastEditors: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 7 
     | 
    
         
            +
            LastEditTime: 2025-01-11 16:25:47
         
     | 
| 
      
 8 
     | 
    
         
            +
            FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\idm.py
         
     | 
| 
      
 9 
     | 
    
         
            +
            Description:
         
     | 
| 
      
 10 
     | 
    
         
            +
            EditPlatform: vscode
         
     | 
| 
      
 11 
     | 
    
         
            +
            ComputerInfo: XPS 15 9510
         
     | 
| 
      
 12 
     | 
    
         
            +
            SystemInfo: Windows 11
         
     | 
| 
      
 13 
     | 
    
         
            +
            Python Version: 3.12
         
     | 
| 
      
 14 
     | 
    
         
            +
            """
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            import datetime
         
     | 
| 
      
 17 
     | 
    
         
            +
            import os
         
     | 
| 
      
 18 
     | 
    
         
            +
            from subprocess import call
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            from rich import print
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            __all__ = ["downloader"]
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            def downloader(task_url, folder_path, file_name, idm_engine=r"D:\Programs\Internet Download Manager\IDMan.exe"):
         
     | 
| 
      
 26 
     | 
    
         
            +
                """
         
     | 
| 
      
 27 
     | 
    
         
            +
                Description:
         
     | 
| 
      
 28 
     | 
    
         
            +
                    Use IDM to download files.
         
     | 
| 
      
 29 
     | 
    
         
            +
                Parameter:
         
     | 
| 
      
 30 
     | 
    
         
            +
                    task_url: str
         
     | 
| 
      
 31 
     | 
    
         
            +
                        The download link of the file.
         
     | 
| 
      
 32 
     | 
    
         
            +
                    folder_path: str
         
     | 
| 
      
 33 
     | 
    
         
            +
                        The path of the folder where the file is saved.
         
     | 
| 
      
 34 
     | 
    
         
            +
                    file_name: str
         
     | 
| 
      
 35 
     | 
    
         
            +
                        The name of the file to be saved.
         
     | 
| 
      
 36 
     | 
    
         
            +
                    idm_engine: str
         
     | 
| 
      
 37 
     | 
    
         
            +
                        The path of the IDM engine. Note: "IDMan.exe"
         
     | 
| 
      
 38 
     | 
    
         
            +
                Return:
         
     | 
| 
      
 39 
     | 
    
         
            +
                    None
         
     | 
| 
      
 40 
     | 
    
         
            +
                Example:
         
     | 
| 
      
 41 
     | 
    
         
            +
                    downloader("https://www.test.com/data.nc", "E:\\Data", "test.nc", "D:\\Programs\\Internet Download Manager\\IDMan.exe")
         
     | 
| 
      
 42 
     | 
    
         
            +
                """
         
     | 
| 
      
 43 
     | 
    
         
            +
                os.makedirs(folder_path, exist_ok=True)
         
     | 
| 
      
 44 
     | 
    
         
            +
                # 将任务添加至队列
         
     | 
| 
      
 45 
     | 
    
         
            +
                call([idm_engine, "/d", task_url, "/p", folder_path, "/f", file_name, "/a"])
         
     | 
| 
      
 46 
     | 
    
         
            +
                # 开始任务队列
         
     | 
| 
      
 47 
     | 
    
         
            +
                call([idm_engine, "/s"])
         
     | 
| 
      
 48 
     | 
    
         
            +
                # print(f"IDM下载器:{file_name}下载任务已添加至队列...")
         
     | 
| 
      
 49 
     | 
    
         
            +
                print("[purple]-" * 50 + f"\n{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" + "[purple]-" * 50)
         
     | 
| 
      
 50 
     | 
    
         
            +
                print(f"[green]IDM Downloader: {file_name} download task has been added to the queue...[/green]")
         
     | 
| 
         @@ -0,0 +1,288 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env python
         
     | 
| 
      
 2 
     | 
    
         
            +
            # coding=utf-8
         
     | 
| 
      
 3 
     | 
    
         
            +
            """
         
     | 
| 
      
 4 
     | 
    
         
            +
            Author: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 5 
     | 
    
         
            +
            Date: 2024-11-28 10:42:56
         
     | 
| 
      
 6 
     | 
    
         
            +
            LastEditors: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 7 
     | 
    
         
            +
            LastEditTime: 2025-01-05 10:51:42
         
     | 
| 
      
 8 
     | 
    
         
            +
            FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\literature.py
         
     | 
| 
      
 9 
     | 
    
         
            +
            Description:
         
     | 
| 
      
 10 
     | 
    
         
            +
            EditPlatform: vscode
         
     | 
| 
      
 11 
     | 
    
         
            +
            ComputerInfo: XPS 15 9510
         
     | 
| 
      
 12 
     | 
    
         
            +
            SystemInfo: Windows 11
         
     | 
| 
      
 13 
     | 
    
         
            +
            Python Version: 3.12
         
     | 
| 
      
 14 
     | 
    
         
            +
            """
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            import os
         
     | 
| 
      
 17 
     | 
    
         
            +
            import re
         
     | 
| 
      
 18 
     | 
    
         
            +
            import time
         
     | 
| 
      
 19 
     | 
    
         
            +
            from pathlib import Path
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            import pandas as pd
         
     | 
| 
      
 22 
     | 
    
         
            +
            import requests
         
     | 
| 
      
 23 
     | 
    
         
            +
            from rich import print
         
     | 
| 
      
 24 
     | 
    
         
            +
            from rich.progress import track
         
     | 
| 
      
 25 
     | 
    
         
            +
            from oafuncs.oa_down.user_agent import get_ua
         
     | 
| 
      
 26 
     | 
    
         
            +
            from oafuncs.oa_file import remove
         
     | 
| 
      
 27 
     | 
    
         
            +
            from oafuncs.oa_data import ensure_list
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            __all__ = ["download5doi"]
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            def _get_file_size(file_path, unit="KB"):
         
     | 
| 
      
 33 
     | 
    
         
            +
                # 检查文件是否存在
         
     | 
| 
      
 34 
     | 
    
         
            +
                if not os.path.exists(file_path):
         
     | 
| 
      
 35 
     | 
    
         
            +
                    return "文件不存在"
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                # 获取文件大小(字节)
         
     | 
| 
      
 38 
     | 
    
         
            +
                file_size = os.path.getsize(file_path)
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                # 单位转换字典
         
     | 
| 
      
 41 
     | 
    
         
            +
                unit_dict = {
         
     | 
| 
      
 42 
     | 
    
         
            +
                    "PB": 1024**5,
         
     | 
| 
      
 43 
     | 
    
         
            +
                    "TB": 1024**4,
         
     | 
| 
      
 44 
     | 
    
         
            +
                    "GB": 1024**3,
         
     | 
| 
      
 45 
     | 
    
         
            +
                    "MB": 1024**2,
         
     | 
| 
      
 46 
     | 
    
         
            +
                    "KB": 1024,
         
     | 
| 
      
 47 
     | 
    
         
            +
                }
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                # 检查传入的单位是否合法
         
     | 
| 
      
 50 
     | 
    
         
            +
                if unit not in unit_dict:
         
     | 
| 
      
 51 
     | 
    
         
            +
                    return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                # 转换文件大小到指定单位
         
     | 
| 
      
 54 
     | 
    
         
            +
                converted_size = file_size / unit_dict[unit]
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                return converted_size
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            class _Downloader:
         
     | 
| 
      
 60 
     | 
    
         
            +
                """
         
     | 
| 
      
 61 
     | 
    
         
            +
                根据doi下载文献pdf
         
     | 
| 
      
 62 
     | 
    
         
            +
                """
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                def __init__(self, doi, store_path):
         
     | 
| 
      
 65 
     | 
    
         
            +
                    self.url_list = [
         
     | 
| 
      
 66 
     | 
    
         
            +
                        r"https://sci-hub.se",
         
     | 
| 
      
 67 
     | 
    
         
            +
                        r"https://sci-hub.ren",
         
     | 
| 
      
 68 
     | 
    
         
            +
                        r"https://sci-hub.st",
         
     | 
| 
      
 69 
     | 
    
         
            +
                        r"https://sci-hub.ru", # 最好用的一个网站
         
     | 
| 
      
 70 
     | 
    
         
            +
                        # ------------------------------------- 以下网站没验证
         
     | 
| 
      
 71 
     | 
    
         
            +
                        r"https://sci-hub.wf",
         
     | 
| 
      
 72 
     | 
    
         
            +
                        r"https://sci-hub.yt",
         
     | 
| 
      
 73 
     | 
    
         
            +
                        r"https://sci-hub.ee",
         
     | 
| 
      
 74 
     | 
    
         
            +
                        r"https://sci-hub.cat",
         
     | 
| 
      
 75 
     | 
    
         
            +
                        r"https://sci-hub.in",
         
     | 
| 
      
 76 
     | 
    
         
            +
                        r"https://www.pismin.com",
         
     | 
| 
      
 77 
     | 
    
         
            +
                        r"https://sci-hub.vkif.top",
         
     | 
| 
      
 78 
     | 
    
         
            +
                        r"https://www.bothonce.com",
         
     | 
| 
      
 79 
     | 
    
         
            +
                        r"https://sci-hub.et-fine.com",
         
     | 
| 
      
 80 
     | 
    
         
            +
                        r"https://sci-hub.hkvisa.net",
         
     | 
| 
      
 81 
     | 
    
         
            +
                        # r"https://sci-hub.3800808.com", # 这个只能手动保存
         
     | 
| 
      
 82 
     | 
    
         
            +
                        r"https://sci-hub.zidianzhan.net",
         
     | 
| 
      
 83 
     | 
    
         
            +
                        r"https://sci-hub.usualwant.com",
         
     | 
| 
      
 84 
     | 
    
         
            +
                    ]
         
     | 
| 
      
 85 
     | 
    
         
            +
                    self.base_url = None
         
     | 
| 
      
 86 
     | 
    
         
            +
                    self.url = None
         
     | 
| 
      
 87 
     | 
    
         
            +
                    self.doi = doi
         
     | 
| 
      
 88 
     | 
    
         
            +
                    self.pdf_url = None
         
     | 
| 
      
 89 
     | 
    
         
            +
                    self.pdf_path = None
         
     | 
| 
      
 90 
     | 
    
         
            +
                    self.headers = {"User-Agent": get_ua().encode("utf-8")}
         
     | 
| 
      
 91 
     | 
    
         
            +
                    # 10.1175/1520-0493(1997)125<0742:IODAOO>2.0.CO;2.pdf
         
     | 
| 
      
 92 
     | 
    
         
            +
                    # self.fname = doi.replace(r'/', '_') + '.pdf'
         
     | 
| 
      
 93 
     | 
    
         
            +
                    self.fname = re.sub(r'[/<>:"?*|]', "_", doi) + ".pdf"
         
     | 
| 
      
 94 
     | 
    
         
            +
                    self.store_path = Path(store_path)
         
     | 
| 
      
 95 
     | 
    
         
            +
                    self.fpath = self.store_path / self.fname
         
     | 
| 
      
 96 
     | 
    
         
            +
                    self.wrong_record_file = self.store_path / "wrong_record.txt"
         
     | 
| 
      
 97 
     | 
    
         
            +
                    self.sleep = 5
         
     | 
| 
      
 98 
     | 
    
         
            +
                    self.cookies = None
         
     | 
| 
      
 99 
     | 
    
         
            +
                    self.check_size = 50
         
     | 
| 
      
 100 
     | 
    
         
            +
                    self.url_index = 0
         
     | 
| 
      
 101 
     | 
    
         
            +
                    self.try_times_each_url_max = 3
         
     | 
| 
      
 102 
     | 
    
         
            +
                    self.try_times = 0
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                def get_pdf_url(self):
         
     | 
| 
      
 105 
     | 
    
         
            +
                    print("[bold #E6E6FA]-" * 120)
         
     | 
| 
      
 106 
     | 
    
         
            +
                    print(f"DOI: {self.doi}")
         
     | 
| 
      
 107 
     | 
    
         
            +
                    print(f"Requesting: {self.url}...")
         
     | 
| 
      
 108 
     | 
    
         
            +
                    try:
         
     | 
| 
      
 109 
     | 
    
         
            +
                        response = requests.get(self.url, headers=self.headers)
         
     | 
| 
      
 110 
     | 
    
         
            +
                        if response.status_code == 200:
         
     | 
| 
      
 111 
     | 
    
         
            +
                            self.cookies = response.cookies
         
     | 
| 
      
 112 
     | 
    
         
            +
                            text = response.text.replace("\\", "")
         
     | 
| 
      
 113 
     | 
    
         
            +
                            # text = text.replace(' ', '')  # It is important to remove the space
         
     | 
| 
      
 114 
     | 
    
         
            +
                            # print(text)
         
     | 
| 
      
 115 
     | 
    
         
            +
                            pattern = re.compile(r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
         
     | 
| 
      
 116 
     | 
    
         
            +
                            match = pattern.search(text)
         
     | 
| 
      
 117 
     | 
    
         
            +
                            if match:
         
     | 
| 
      
 118 
     | 
    
         
            +
                                got_url = match.group(1)
         
     | 
| 
      
 119 
     | 
    
         
            +
                                if r"http" not in got_url:
         
     | 
| 
      
 120 
     | 
    
         
            +
                                    if got_url[:2] == "//":
         
     | 
| 
      
 121 
     | 
    
         
            +
                                        self.pdf_url = "https:" + got_url
         
     | 
| 
      
 122 
     | 
    
         
            +
                                    else:
         
     | 
| 
      
 123 
     | 
    
         
            +
                                        self.pdf_url = self.base_url + got_url
         
     | 
| 
      
 124 
     | 
    
         
            +
                                else:
         
     | 
| 
      
 125 
     | 
    
         
            +
                                    self.pdf_url = got_url
         
     | 
| 
      
 126 
     | 
    
         
            +
                                print(f"URL: {self.pdf_url}")
         
     | 
| 
      
 127 
     | 
    
         
            +
                            else:
         
     | 
| 
      
 128 
     | 
    
         
            +
                                print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
         
     | 
| 
      
 129 
     | 
    
         
            +
                                self.try_times = self.try_times_each_url_max + 1
         
     | 
| 
      
 130 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 131 
     | 
    
         
            +
                            print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
         
     | 
| 
      
 132 
     | 
    
         
            +
                            print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
         
     | 
| 
      
 133 
     | 
    
         
            +
                            self.try_times = self.try_times_each_url_max + 1
         
     | 
| 
      
 134 
     | 
    
         
            +
                    except Exception as e:
         
     | 
| 
      
 135 
     | 
    
         
            +
                        print(f"Failed to retrieve the webpage. Error: {e}")
         
     | 
| 
      
 136 
     | 
    
         
            +
                        self.try_times = self.try_times_each_url_max + 1
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                def url_iterate(self):
         
     | 
| 
      
 139 
     | 
    
         
            +
                    if self.url_index >= len(self.url_list):
         
     | 
| 
      
 140 
     | 
    
         
            +
                        return
         
     | 
| 
      
 141 
     | 
    
         
            +
                    url = self.url_list[self.url_index]
         
     | 
| 
      
 142 
     | 
    
         
            +
                    self.base_url = url
         
     | 
| 
      
 143 
     | 
    
         
            +
                    self.url = url + "/" + self.doi
         
     | 
| 
      
 144 
     | 
    
         
            +
                    self.get_pdf_url()
         
     | 
| 
      
 145 
     | 
    
         
            +
                    # for url in self.url_list:
         
     | 
| 
      
 146 
     | 
    
         
            +
                    #     self.url = url + self.doi
         
     | 
| 
      
 147 
     | 
    
         
            +
                    #     self.get_pdf_url()
         
     | 
| 
      
 148 
     | 
    
         
            +
                    #     if self.pdf_url:
         
     | 
| 
      
 149 
     | 
    
         
            +
                    #         break
         
     | 
| 
      
 150 
     | 
    
         
            +
             
     | 
| 
      
 151 
     | 
    
         
            +
                def write_wrong_record(self):
         
     | 
| 
      
 152 
     | 
    
         
            +
                    # 先读取txt中的内容,如果已经存在则不再写入
         
     | 
| 
      
 153 
     | 
    
         
            +
                    if self.wrong_record_file.exists():
         
     | 
| 
      
 154 
     | 
    
         
            +
                        with open(self.wrong_record_file, "r") as f:
         
     | 
| 
      
 155 
     | 
    
         
            +
                            lines = f.readlines()
         
     | 
| 
      
 156 
     | 
    
         
            +
                        if self.doi in lines:
         
     | 
| 
      
 157 
     | 
    
         
            +
                            return
         
     | 
| 
      
 158 
     | 
    
         
            +
                    with open(self.wrong_record_file, "a") as f:
         
     | 
| 
      
 159 
     | 
    
         
            +
                        f.write(self.doi + "\n")
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
                def download_pdf(self):
         
     | 
| 
      
 162 
     | 
    
         
            +
                    if self.fpath.exists():
         
     | 
| 
      
 163 
     | 
    
         
            +
                        fsize = _get_file_size(self.fpath, unit="KB")
         
     | 
| 
      
 164 
     | 
    
         
            +
                        if fsize < self.check_size:
         
     | 
| 
      
 165 
     | 
    
         
            +
                            # delete the wrong file
         
     | 
| 
      
 166 
     | 
    
         
            +
                            os.remove(self.fpath)
         
     | 
| 
      
 167 
     | 
    
         
            +
                            print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
         
     | 
| 
      
 168 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 169 
     | 
    
         
            +
                            print("[bold #E6E6FA]-" * 120)
         
     | 
| 
      
 170 
     | 
    
         
            +
                            print(f"[bold purple]The PDF file {self.fpath} already exists.")
         
     | 
| 
      
 171 
     | 
    
         
            +
                            return
         
     | 
| 
      
 172 
     | 
    
         
            +
                    self.url_index = 0
         
     | 
| 
      
 173 
     | 
    
         
            +
                    already_downloaded = False
         
     | 
| 
      
 174 
     | 
    
         
            +
                    self.try_times = 0
         
     | 
| 
      
 175 
     | 
    
         
            +
                    while not already_downloaded:
         
     | 
| 
      
 176 
     | 
    
         
            +
                        self.url_iterate()
         
     | 
| 
      
 177 
     | 
    
         
            +
                        if not self.pdf_url:
         
     | 
| 
      
 178 
     | 
    
         
            +
                            self.url_index += 1
         
     | 
| 
      
 179 
     | 
    
         
            +
                            if self.url_index >= len(self.url_list):
         
     | 
| 
      
 180 
     | 
    
         
            +
                                print("Failed to download the PDF file.")
         
     | 
| 
      
 181 
     | 
    
         
            +
                                self.write_wrong_record()
         
     | 
| 
      
 182 
     | 
    
         
            +
                                return
         
     | 
| 
      
 183 
     | 
    
         
            +
                            else:
         
     | 
| 
      
 184 
     | 
    
         
            +
                                self.try_times = 0
         
     | 
| 
      
 185 
     | 
    
         
            +
                                continue
         
     | 
| 
      
 186 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 187 
     | 
    
         
            +
                            self.try_times += 1
         
     | 
| 
      
 188 
     | 
    
         
            +
                        if self.try_times > self.try_times_each_url_max:
         
     | 
| 
      
 189 
     | 
    
         
            +
                            self.url_index += 1
         
     | 
| 
      
 190 
     | 
    
         
            +
                            if self.url_index >= len(self.url_list):
         
     | 
| 
      
 191 
     | 
    
         
            +
                                # print("Failed to download the PDF file.")
         
     | 
| 
      
 192 
     | 
    
         
            +
                                self.write_wrong_record()
         
     | 
| 
      
 193 
     | 
    
         
            +
                                return
         
     | 
| 
      
 194 
     | 
    
         
            +
                        print(f"Downloading: {self.fname}...")
         
     | 
| 
      
 195 
     | 
    
         
            +
                        try:
         
     | 
| 
      
 196 
     | 
    
         
            +
                            response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
         
     | 
| 
      
 197 
     | 
    
         
            +
                            if response.status_code == 200:
         
     | 
| 
      
 198 
     | 
    
         
            +
                                with open(self.fpath, "wb") as f:
         
     | 
| 
      
 199 
     | 
    
         
            +
                                    f.write(response.content)
         
     | 
| 
      
 200 
     | 
    
         
            +
                                fsize = _get_file_size(self.fpath, unit="KB")
         
     | 
| 
      
 201 
     | 
    
         
            +
                                if fsize < self.check_size:
         
     | 
| 
      
 202 
     | 
    
         
            +
                                    # delete the wrong file
         
     | 
| 
      
 203 
     | 
    
         
            +
                                    os.remove(self.fpath)
         
     | 
| 
      
 204 
     | 
    
         
            +
                                    print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
         
     | 
| 
      
 205 
     | 
    
         
            +
                                else:
         
     | 
| 
      
 206 
     | 
    
         
            +
                                    print(f"[bold green]Sucessful to download {self.fpath}")
         
     | 
| 
      
 207 
     | 
    
         
            +
                                    already_downloaded = True
         
     | 
| 
      
 208 
     | 
    
         
            +
                            else:
         
     | 
| 
      
 209 
     | 
    
         
            +
                                self.try_times = self.try_times_each_url_max + 1
         
     | 
| 
      
 210 
     | 
    
         
            +
                                print(f"Failed to download the PDF file. Status code: {response.status_code}")
         
     | 
| 
      
 211 
     | 
    
         
            +
                                print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
         
     | 
| 
      
 212 
     | 
    
         
            +
                        except Exception as e:
         
     | 
| 
      
 213 
     | 
    
         
            +
                            print(f"Failed to download the PDF file. Error: {e}")
         
     | 
| 
      
 214 
     | 
    
         
            +
                        time.sleep(self.sleep)
         
     | 
| 
      
 215 
     | 
    
         
            +
                        if self.try_times >= self.try_times_each_url_max:
         
     | 
| 
      
 216 
     | 
    
         
            +
                            self.url_index += 1
         
     | 
| 
      
 217 
     | 
    
         
            +
                            if self.url_index >= len(self.url_list):
         
     | 
| 
      
 218 
     | 
    
         
            +
                                print("\n[bold #CD5C5C]Failed to download the PDF file.")
         
     | 
| 
      
 219 
     | 
    
         
            +
                                self.write_wrong_record()
         
     | 
| 
      
 220 
     | 
    
         
            +
                                return
         
     | 
| 
      
 221 
     | 
    
         
            +
                            if self.try_times == self.try_times_each_url_max:
         
     | 
| 
      
 222 
     | 
    
         
            +
                                print(f"Tried {self.try_times} times for {self.url_list[self.url_index-1]}.")
         
     | 
| 
      
 223 
     | 
    
         
            +
                                print("Try another URL...")
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
            def _read_excel(file, col_name=r"DOI"):
         
     | 
| 
      
 227 
     | 
    
         
            +
                df = pd.read_excel(file)
         
     | 
| 
      
 228 
     | 
    
         
            +
                df_list = df[col_name].tolist()
         
     | 
| 
      
 229 
     | 
    
         
            +
                # 去掉nan
         
     | 
| 
      
 230 
     | 
    
         
            +
                df_list = [doi for doi in df_list if str(doi) != "nan"]
         
     | 
| 
      
 231 
     | 
    
         
            +
                return df_list
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
      
 233 
     | 
    
         
            +
             
     | 
| 
      
 234 
     | 
    
         
            +
            def _read_txt(file):
         
     | 
| 
      
 235 
     | 
    
         
            +
                with open(file, "r") as f:
         
     | 
| 
      
 236 
     | 
    
         
            +
                    lines = f.readlines()
         
     | 
| 
      
 237 
     | 
    
         
            +
                # 去掉换行符以及空行
         
     | 
| 
      
 238 
     | 
    
         
            +
                lines = [line.strip() for line in lines if line.strip()]
         
     | 
| 
      
 239 
     | 
    
         
            +
                return lines
         
     | 
| 
      
 240 
     | 
    
         
            +
             
     | 
| 
      
 241 
     | 
    
         
            +
             
     | 
| 
      
 242 
     | 
    
         
            +
            def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None, col_name=r"DOI"):
         
     | 
| 
      
 243 
     | 
    
         
            +
                """
         
     | 
| 
      
 244 
     | 
    
         
            +
                Description:
         
     | 
| 
      
 245 
     | 
    
         
            +
                    Download PDF files by DOI.
         
     | 
| 
      
 246 
     | 
    
         
            +
             
     | 
| 
      
 247 
     | 
    
         
            +
                Parameters:
         
     | 
| 
      
 248 
     | 
    
         
            +
                    store_path: str, The path to store the PDF files.
         
     | 
| 
      
 249 
     | 
    
         
            +
                    doi_list: list or str, The list of DOIs.
         
     | 
| 
      
 250 
     | 
    
         
            +
                    txt_file: str, The path of the txt file that contains the DOIs.
         
     | 
| 
      
 251 
     | 
    
         
            +
                    excel_file: str, The path of the excel file that contains the DOIs.
         
     | 
| 
      
 252 
     | 
    
         
            +
                    col_name: str, The column name of the DOIs in the excel file. Default is 'DOI'.
         
     | 
| 
      
 253 
     | 
    
         
            +
             
     | 
| 
      
 254 
     | 
    
         
            +
                Returns:
         
     | 
| 
      
 255 
     | 
    
         
            +
                    None
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
                Example:
         
     | 
| 
      
 258 
     | 
    
         
            +
                    download5doi(doi_list='10.3389/feart.2021.698876')
         
     | 
| 
      
 259 
     | 
    
         
            +
                    download5doi(store_path='I:\\Delete\\ref_pdf', doi_list='10.3389/feart.2021.698876')
         
     | 
| 
      
 260 
     | 
    
         
            +
                    download5doi(store_path='I:\\Delete\\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
         
     | 
| 
      
 261 
     | 
    
         
            +
                    download5doi(store_path='I:\\Delete\\ref_pdf', txt_file='I:\\Delete\\ref_pdf\\wrong_record.txt')
         
     | 
| 
      
 262 
     | 
    
         
            +
                    download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx')
         
     | 
| 
      
 263 
     | 
    
         
            +
                    download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx', col_name='DOI')
         
     | 
| 
      
 264 
     | 
    
         
            +
                """
         
     | 
| 
      
 265 
     | 
    
         
            +
                if not store_path:
         
     | 
| 
      
 266 
     | 
    
         
            +
                    store_path = Path.cwd()
         
     | 
| 
      
 267 
     | 
    
         
            +
                else:
         
     | 
| 
      
 268 
     | 
    
         
            +
                    store_path = Path(str(store_path))
         
     | 
| 
      
 269 
     | 
    
         
            +
                store_path.mkdir(parents=True, exist_ok=True)
         
     | 
| 
      
 270 
     | 
    
         
            +
                store_path = str(store_path)
         
     | 
| 
      
 271 
     | 
    
         
            +
             
     | 
| 
      
 272 
     | 
    
         
            +
                if doi_list:
         
     | 
| 
      
 273 
     | 
    
         
            +
                    doi_list = ensure_list(doi_list)
         
     | 
| 
      
 274 
     | 
    
         
            +
                if txt_file:
         
     | 
| 
      
 275 
     | 
    
         
            +
                    doi_list = _read_txt(txt_file)
         
     | 
| 
      
 276 
     | 
    
         
            +
                if excel_file:
         
     | 
| 
      
 277 
     | 
    
         
            +
                    doi_list = _read_excel(excel_file, col_name)
         
     | 
| 
      
 278 
     | 
    
         
            +
                remove(Path(store_path) / "wrong_record.txt")
         
     | 
| 
      
 279 
     | 
    
         
            +
                print(f"Downloading {len(doi_list)} PDF files...")
         
     | 
| 
      
 280 
     | 
    
         
            +
                for doi in track(doi_list, description="Downloading..."):
         
     | 
| 
      
 281 
     | 
    
         
            +
                    download = _Downloader(doi, store_path)
         
     | 
| 
      
 282 
     | 
    
         
            +
                    download.download_pdf()
         
     | 
| 
      
 283 
     | 
    
         
            +
             
     | 
| 
      
 284 
     | 
    
         
            +
             
     | 
| 
      
 285 
     | 
    
         
            +
            if __name__ == "__main__":
         
     | 
| 
      
 286 
     | 
    
         
            +
                store_path = r"F:\AAA-Delete\DOI_Reference\5\pdf"
         
     | 
| 
      
 287 
     | 
    
         
            +
                excel_file = r"F:\AAA-Delete\DOI_Reference\5\savedrecs.xls"
         
     | 
| 
      
 288 
     | 
    
         
            +
                download5doi(store_path, excel_file=excel_file)
         
     | 
| 
         @@ -0,0 +1,151 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env python
         
     | 
| 
      
 2 
     | 
    
         
            +
            # coding=utf-8
         
     | 
| 
      
 3 
     | 
    
         
            +
            """
         
     | 
| 
      
 4 
     | 
    
         
            +
            Author: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 5 
     | 
    
         
            +
            Date: 2024-12-01 19:32:25
         
     | 
| 
      
 6 
     | 
    
         
            +
            LastEditors: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 7 
     | 
    
         
            +
            LastEditTime: 2024-12-10 11:16:36
         
     | 
| 
      
 8 
     | 
    
         
            +
            FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\test.py
         
     | 
| 
      
 9 
     | 
    
         
            +
            Description:
         
     | 
| 
      
 10 
     | 
    
         
            +
            EditPlatform: vscode
         
     | 
| 
      
 11 
     | 
    
         
            +
            ComputerInfo: XPS 15 9510
         
     | 
| 
      
 12 
     | 
    
         
            +
            SystemInfo: Windows 11
         
     | 
| 
      
 13 
     | 
    
         
            +
            Python Version: 3.12
         
     | 
| 
      
 14 
     | 
    
         
            +
            """
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            import os
         
     | 
| 
      
 17 
     | 
    
         
            +
            import random
         
     | 
| 
      
 18 
     | 
    
         
            +
            import re
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            def is_valid_user_agent(user_agent):
         
     | 
| 
      
 22 
     | 
    
         
            +
                # 简单的正则表达式来检查User Agent的格式
         
     | 
| 
      
 23 
     | 
    
         
            +
                # 这个正则表达式检查User Agent是否包含常见的浏览器信息格式
         
     | 
| 
      
 24 
     | 
    
         
            +
                pattern = re.compile(
         
     | 
| 
      
 25 
     | 
    
         
            +
                    r"^(?:(?:Mozilla|Opera|Chrome|Safari|Edg|OPR)/[\d.]+)"
         
     | 
| 
      
 26 
     | 
    
         
            +
                    r"(?:\s(?:\(.*?\)))?"
         
     | 
| 
      
 27 
     | 
    
         
            +
                    r"(?:\s(?:Gecko|AppleWebKit|KHTML, like Gecko|Version|Edge|OPR)/[\d.]+)?"
         
     | 
| 
      
 28 
     | 
    
         
            +
                    r"(?:\s.*?(?:rv:|Version/|Ubuntu|Macintosh|Windows|X11|Linux|CrOS|FreeBSD|OpenBSD|NetBSD|iPhone|iPad|iPod|Android|BlackBerry|BB10|Mobile|Symbian|Windows Phone|IEMobile|Opera Mini|Opera Mobi|UCBrowser|MQQBrowser|baiduboxapp|baidubrowser|Safari|Firefox|MSIE|Trident|Edge|EdgA|Chrome|CriOS|Vivaldi|Sleipnir|Midori|ELinks|Lynx|w3m|Arora|Epiphany|Konqueror|Dillo|Netscape|SeaMonkey|K-Meleon|Camino|Iceape|Galeon|GranParadiso|Iceweasel|Firefox|Fennec|Conkeror|PaleMoon|Uzbl|QupZilla|Otter|Waterfox|Basilisk|Cyberfox|PaleMoon|GNU IceCat|GNU IceWeasel|IceCat|IceWeasel|Seamonkey|Iceape|Firefox|Epiphany|Web|Safari|Android|Mobile|BlackBerry|BB10|Tablet|Silk|Kindle|FxiOS|Focus|SamsungBrowser|browser|AppleWebKit|Puffin|DuckDuckGo|YaBrowser|Yandex|Amigo|NokiaBrowser|OviBrowser|OneBrowser|Chrome|Firefox|Safari|OPR|Coast|Mercury|Silk|Skyfire|IEMobile|Bolt|Jasmine|NativeHost|Crosswalk|TizenBrowser|SailfishBrowser|SamsungBrowser|Silk-Accelerated|UCBrowser|Quark|XiaoMi|OnePlus|Vivo|Oppo|Realme|Meizu|Lenovo|Huawei|ZTE|Alcatel|Sony|Nokia|LG|HTC|Asus|Acer|Motorola|Samsung)/[\d.]+)?$"
         
     | 
| 
      
 29 
     | 
    
         
            +
                )
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                # 使用正则表达式匹配User Agent字符串
         
     | 
| 
      
 32 
     | 
    
         
            +
                if pattern.match(user_agent):
         
     | 
| 
      
 33 
     | 
    
         
            +
                    return True
         
     | 
| 
      
 34 
     | 
    
         
            +
                else:
         
     | 
| 
      
 35 
     | 
    
         
            +
                    return False
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            def get_ua():
         
     | 
| 
      
 39 
     | 
    
         
            +
                current_dir = os.path.dirname(os.path.abspath(__file__))
         
     | 
| 
      
 40 
     | 
    
         
            +
                ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                with open(ua_file_txt, "r") as f:
         
     | 
| 
      
 43 
     | 
    
         
            +
                    ua_list = f.readlines()
         
     | 
| 
      
 44 
     | 
    
         
            +
                    # 去掉换行符和空行
         
     | 
| 
      
 45 
     | 
    
         
            +
                    ua_list = [line.strip() for line in ua_list if line.strip()]
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                return random.choice(ua_list)
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
            def get_ua_org():
         
     | 
| 
      
 51 
     | 
    
         
            +
                ua_list = [
         
     | 
| 
      
 52 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
         
     | 
| 
      
 53 
     | 
    
         
            +
                    "Opera/8.0 (Windows NT 5.1; U; en)",
         
     | 
| 
      
 54 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
         
     | 
| 
      
 55 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
         
     | 
| 
      
 56 
     | 
    
         
            +
                    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
         
     | 
| 
      
 57 
     | 
    
         
            +
                    "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
         
     | 
| 
      
 58 
     | 
    
         
            +
                    "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
         
     | 
| 
      
 59 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
         
     | 
| 
      
 60 
     | 
    
         
            +
                    "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
         
     | 
| 
      
 61 
     | 
    
         
            +
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
         
     | 
| 
      
 62 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
         
     | 
| 
      
 63 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
         
     | 
| 
      
 64 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
         
     | 
| 
      
 65 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
         
     | 
| 
      
 66 
     | 
    
         
            +
                    "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
         
     | 
| 
      
 67 
     | 
    
         
            +
                    "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
         
     | 
| 
      
 68 
     | 
    
         
            +
                    "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
         
     | 
| 
      
 69 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
         
     | 
| 
      
 70 
     | 
    
         
            +
                    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
         
     | 
| 
      
 71 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
         
     | 
| 
      
 72 
     | 
    
         
            +
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
         
     | 
| 
      
 73 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
         
     | 
| 
      
 74 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
         
     | 
| 
      
 75 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
         
     | 
| 
      
 76 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
         
     | 
| 
      
 77 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
         
     | 
| 
      
 78 
     | 
    
         
            +
                    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
         
     | 
| 
      
 79 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
         
     | 
| 
      
 80 
     | 
    
         
            +
                    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
         
     | 
| 
      
 81 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
         
     | 
| 
      
 82 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
         
     | 
| 
      
 83 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
         
     | 
| 
      
 84 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
         
     | 
| 
      
 85 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
         
     | 
| 
      
 86 
     | 
    
         
            +
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
         
     | 
| 
      
 87 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
         
     | 
| 
      
 88 
     | 
    
         
            +
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36",
         
     | 
| 
      
 89 
     | 
    
         
            +
                    "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
         
     | 
| 
      
 90 
     | 
    
         
            +
                    "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
         
     | 
| 
      
 91 
     | 
    
         
            +
                    "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
         
     | 
| 
      
 92 
     | 
    
         
            +
                    "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
         
     | 
| 
      
 93 
     | 
    
         
            +
                    "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
         
     | 
| 
      
 94 
     | 
    
         
            +
                    "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
         
     | 
| 
      
 95 
     | 
    
         
            +
                    "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
         
     | 
| 
      
 96 
     | 
    
         
            +
                    "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
         
     | 
| 
      
 97 
     | 
    
         
            +
                    "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
         
     | 
| 
      
 98 
     | 
    
         
            +
                    "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
         
     | 
| 
      
 99 
     | 
    
         
            +
                    "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
         
     | 
| 
      
 100 
     | 
    
         
            +
                    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
         
     | 
| 
      
 101 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
         
     | 
| 
      
 102 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
         
     | 
| 
      
 103 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
         
     | 
| 
      
 104 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
         
     | 
| 
      
 105 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
         
     | 
| 
      
 106 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
         
     | 
| 
      
 107 
     | 
    
         
            +
                    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
         
     | 
| 
      
 108 
     | 
    
         
            +
                    "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
         
     | 
| 
      
 109 
     | 
    
         
            +
                    "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
         
     | 
| 
      
 110 
     | 
    
         
            +
                    "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
         
     | 
| 
      
 111 
     | 
    
         
            +
                    "UCWEB7.0.2.37/28/999",
         
     | 
| 
      
 112 
     | 
    
         
            +
                    "NOKIA5700/UCWEB7.0.2.37/28/999",
         
     | 
| 
      
 113 
     | 
    
         
            +
                    "Openwave/UCWEB7.0.2.37/28/999",
         
     | 
| 
      
 114 
     | 
    
         
            +
                    "Openwave/UCWEB7.0.2.37/28/999",
         
     | 
| 
      
 115 
     | 
    
         
            +
                ]
         
     | 
| 
      
 116 
     | 
    
         
            +
                with open(newtxtfile, "w") as f:
         
     | 
| 
      
 117 
     | 
    
         
            +
                    for line in ua_list:
         
     | 
| 
      
 118 
     | 
    
         
            +
                        f.write(line + "\n")
         
     | 
| 
      
 119 
     | 
    
         
            +
                # print(f'Using User-Agent: {ua}')
         
     | 
| 
      
 120 
     | 
    
         
            +
                ua = random.choice(ua_list)
         
     | 
| 
      
 121 
     | 
    
         
            +
                return ua
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
            # get_ua_org()
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
            if __name__ == "__main__":
         
     | 
| 
      
 127 
     | 
    
         
            +
                txtfile = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list.txt"
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
                with open(txtfile, "r") as f:
         
     | 
| 
      
 130 
     | 
    
         
            +
                    lines = f.readlines()
         
     | 
| 
      
 131 
     | 
    
         
            +
                    # 去掉换行符和空行
         
     | 
| 
      
 132 
     | 
    
         
            +
                    lines = [line.strip() for line in lines if line.strip()]
         
     | 
| 
      
 133 
     | 
    
         
            +
                """ new_line = []
         
     | 
| 
      
 134 
     | 
    
         
            +
                for i in range(len(lines)):
         
     | 
| 
      
 135 
     | 
    
         
            +
                    if '/' in lines[i]:
         
     | 
| 
      
 136 
     | 
    
         
            +
                        new_line.append(lines[i])
         
     | 
| 
      
 137 
     | 
    
         
            +
                    else:
         
     | 
| 
      
 138 
     | 
    
         
            +
                        print(lines[i]) """
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
                new_line = []
         
     | 
| 
      
 141 
     | 
    
         
            +
                for line in lines:
         
     | 
| 
      
 142 
     | 
    
         
            +
                    if is_valid_user_agent(line):
         
     | 
| 
      
 143 
     | 
    
         
            +
                        # print(line)
         
     | 
| 
      
 144 
     | 
    
         
            +
                        new_line.append(line)
         
     | 
| 
      
 145 
     | 
    
         
            +
                    else:
         
     | 
| 
      
 146 
     | 
    
         
            +
                        print(f"Invalid User-Agent: {line}")
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                newtxtfile = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\ua_list_new.txt"
         
     | 
| 
      
 149 
     | 
    
         
            +
                with open(newtxtfile, "w") as f:
         
     | 
| 
      
 150 
     | 
    
         
            +
                    for line in new_line:
         
     | 
| 
      
 151 
     | 
    
         
            +
                        f.write(line + "\n")
         
     | 
| 
         @@ -0,0 +1,31 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env python
         
     | 
| 
      
 2 
     | 
    
         
            +
            # coding=utf-8
         
     | 
| 
      
 3 
     | 
    
         
            +
            """
         
     | 
| 
      
 4 
     | 
    
         
            +
            Author: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 5 
     | 
    
         
            +
            Date: 2024-12-26 08:06:34
         
     | 
| 
      
 6 
     | 
    
         
            +
            LastEditors: Liu Kun && 16031215@qq.com
         
     | 
| 
      
 7 
     | 
    
         
            +
            LastEditTime: 2024-12-26 08:06:34
         
     | 
| 
      
 8 
     | 
    
         
            +
            FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\user_agent.py
         
     | 
| 
      
 9 
     | 
    
         
            +
            Description:
         
     | 
| 
      
 10 
     | 
    
         
            +
            EditPlatform: vscode
         
     | 
| 
      
 11 
     | 
    
         
            +
            ComputerInfo: XPS 15 9510
         
     | 
| 
      
 12 
     | 
    
         
            +
            SystemInfo: Windows 11
         
     | 
| 
      
 13 
     | 
    
         
            +
            Python Version: 3.12
         
     | 
| 
      
 14 
     | 
    
         
            +
            """
         
     | 
| 
      
 15 
     | 
    
         
            +
            import os
         
     | 
| 
      
 16 
     | 
    
         
            +
            import random
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            __all__ = ["get_ua"]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            def get_ua():
         
     | 
| 
      
 23 
     | 
    
         
            +
                current_dir = os.path.dirname(os.path.abspath(__file__))
         
     | 
| 
      
 24 
     | 
    
         
            +
                ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                with open(ua_file_txt, "r") as f:
         
     | 
| 
      
 27 
     | 
    
         
            +
                    ua_list = f.readlines()
         
     | 
| 
      
 28 
     | 
    
         
            +
                    # 去掉换行符和空行
         
     | 
| 
      
 29 
     | 
    
         
            +
                    ua_list = [line.strip() for line in ua_list if line.strip()]
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                return random.choice(ua_list)
         
     |