PyPI - oafuncs - Versions diffs - 0.0.78__py2.py3-none-any.whl → 0.0.80__py2.py3-none-any.whl - Mend

oafuncs 0.0.78py2.py3-none-any.whl → 0.0.80py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

oafuncs/__init__.py +15 -12
oafuncs/oa_down/User_Agent-list.txt +13 -2837
oafuncs/oa_down/hycom_3hourly.py +184 -63
oafuncs/oa_down/test.py +52 -28
oafuncs/oa_down/test_ua.py +151 -0
oafuncs/oa_draw.py +45 -1
oafuncs/oa_s/__init__.py +23 -0
oafuncs/oa_s/oa_cmap.py +163 -0
oafuncs/oa_s/oa_data.py +187 -0
oafuncs/oa_s/oa_draw.py +451 -0
oafuncs/oa_s/oa_file.py +332 -0
oafuncs/oa_s/oa_help.py +39 -0
oafuncs/oa_s/oa_nc.py +410 -0
oafuncs/oa_s/oa_python.py +107 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/__init__.py" +26 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_cmap.py" +163 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_data.py" +187 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/__init__.py" +20 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/hycom_3hourly.py" +1176 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/literature.py" +332 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/test_ua.py" +151 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_draw.py" +451 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_file.py" +332 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_help.py" +39 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_nc.py" +410 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_python.py" +107 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/__init__.py" +21 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/meteorological.py" +168 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/ocean.py" +158 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/scientific.py" +139 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_tool/__init__.py" +18 -0
oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_tool/email.py" +114 -0
{oafuncs-0.0.78.dist-info → oafuncs-0.0.80.dist-info}/METADATA +1 -1
oafuncs-0.0.80.dist-info/RECORD +51 -0
oafuncs-0.0.80.dist-info/top_level.txt +2 -0
oafuncs-0.0.78.dist-info/RECORD +0 -24
oafuncs-0.0.78.dist-info/top_level.txt +0 -1
{oafuncs-0.0.78.dist-info → oafuncs-0.0.80.dist-info}/LICENSE.txt +0 -0
{oafuncs-0.0.78.dist-info → oafuncs-0.0.80.dist-info}/WHEEL +0 -0

oafuncs/oa_down/hycom_3hourly.py CHANGED Viewed

@@ -18,13 +18,15 @@ import os
 import random
 import time
 import warnings
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
+from threading import Lock
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import requests
+from bs4 import BeautifulSoup
 from rich import print
 from rich.progress import Progress
@@ -53,17 +55,17 @@ data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
 # 在网页上提交超过范围的时间，会返回该数据集实际时间范围，从而纠正下面的时间范围
 # 目前只纠正了GLBv0.08 93.0的时间范围，具体到小时了
 # 其他数据集的时刻暂时默认为00起，21止
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "19940101", "time_end": "20151231"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "20140701", "time_end": "20160430"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "20160501", "time_end": "20170131"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "20170201", "time_end": "20170531"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "20170601", "time_end": "20170930"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "20171001", "time_end": "20171231"}
+data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
+data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
+data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
+data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
+data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
+data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
 data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
 # GLBu0.08
-data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "20180919", "time_end": "20181208"}
+data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
 # GLBy0.08
-data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "20181204", "time_end": "20300904"}
+data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "20300904"}
 # variable
 variable_info = {
@@ -139,10 +141,11 @@ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
 uv3z_930_y = {}
 ts3z_930_y = {}
 ssh_930_y = {}
-for y_930_y in range(2018, 2025):
+for y_930_y in range(2018, 2030):
     uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
     ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
     ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
+# GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
 url_930_y = {
     "uv3z": uv3z_930_y,
     "ts3z": ts3z_930_y,
@@ -370,7 +373,16 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
     if have_data:
         for d, v, trange in zip(d_list, v_list, trange_list):
             print(f"[bold blue]{d} {v} {trange}")
-        return True
+        if is_single_time:
+            return True
+        else:
+            base_url_s = get_base_url(d_list[0], v_list[0], "u", str(time_start))
+            base_url_e = get_base_url(d_list[0], v_list[0], "u", str(time_end))
+            if base_url_s == base_url_e:
+                return True
+            else:
+                print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
+                return False
     else:
         print(f"[bold red]{time_input_str} is not in any dataset and version")
         return False
@@ -454,7 +466,8 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
     return dataset_name_out, version_name_out
-def get_base_url(dataset_name, version_name, var, year_str):
+def get_base_url(dataset_name, version_name, var, ymdh_str):
+    year_str = int(ymdh_str[:4])
     url_dict = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["url"]
     classification_method = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["classification"]
     if classification_method == "year_different":
@@ -470,6 +483,12 @@ def get_base_url(dataset_name, version_name, var, year_str):
         if base_url is None:
             print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
     elif classification_method == "var_year_different":
+        if dataset_name == "GLBy0.08" and version_name == "93.0":
+            mdh_str = ymdh_str[4:]
+            # GLBy0.08 93.0
+            # data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
+            if mdh_str <= "010109":
+                year_str = int(ymdh_str[:4]) - 1
         base_url = None
         for key, value in var_group.items():
             if var in value:
@@ -480,8 +499,8 @@ def get_base_url(dataset_name, version_name, var, year_str):
     return base_url
-def get_submit_url(dataset_name, version_name, var, year_str, query_dict):
-    base_url = get_base_url(dataset_name, version_name, var, year_str)
+def get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict):
+    base_url = get_base_url(dataset_name, version_name, var, ymdh_str)
     if isinstance(query_dict["var"], str):
         query_dict["var"] = [query_dict["var"]]
     target_url = base_url + "&".join(f"var={var}" for var in query_dict["var"]) + "&" + "&".join(f"{key}={value}" for key, value in query_dict.items() if key != "var")
@@ -494,10 +513,37 @@ def clear_existing_file(file_full_path):
         print(f"{file_full_path} has been removed")
+def _get_file_size(file_path, unit="KB"):
+    # 检查文件是否存在
+    if not os.path.exists(file_path):
+        return "文件不存在"
+    # 获取文件大小（字节）
+    file_size = os.path.getsize(file_path)
+    # 单位转换字典
+    unit_dict = {"PB": 1024**5, "TB": 1024**4, "GB": 1024**3, "MB": 1024**2, "KB": 1024}
+    # 检查传入的单位是否合法
+    if unit not in unit_dict:
+        return "单位不合法，请选择PB、TB、GB、MB、KB中的一个"
+    # 转换文件大小到指定单位
+    converted_size = file_size / unit_dict[unit]
+    return converted_size
 def check_existing_file(file_full_path):
     if os.path.exists(file_full_path):
         print(f"[bold #FFA54F]{file_full_path} exists")
-        return True
+        fsize = _get_file_size(file_full_path)
+        if fsize < 5:
+            print(f"[bold #FFA54F]{file_full_path} may be incomplete\nFile size: {fsize:.2f} KB")
+            # clear_existing_file(file_full_path)
+            return False
+        else:
+            return True
     else:
         # print(f'{file_full_path} does not exist')
         return False
@@ -512,10 +558,13 @@ def get_ua():
         # 去掉换行符和空行
         ua_list = [line.strip() for line in ua_list if line.strip()]
+    # if current_platform == 'Linux':
+    #     ua_list = [line for line in ua_list if 'Linux' in line]
     return random.choice(ua_list)
-def get_proxy():
+def get_proxy_file():
     # 获取当前脚本的绝对路径
     script_dir = os.path.dirname(os.path.abspath(__file__))
     # 构建ip.txt的绝对路径
@@ -531,15 +580,63 @@ def get_proxy():
     return proxies
-def dlownload_file(target_url, store_path, file_name, check=False):
+def scrape_and_categorize_proxies(choose_protocol="http"):
+    url = "https://topproxylinks.com/"
+    # 发送HTTP请求获取网页内容
+    response = requests.get(url)
+    # 使用BeautifulSoup解析网页
+    soup = BeautifulSoup(response.text, "html.parser")
+    # 初始化字典来存储不同协议的代理
+    proxies_dict = {"http": [], "socks4": [], "socks5": []}
+    # 查找表格中的所有行
+    tbody = soup.find("tbody")
+    if tbody:
+        for row in tbody.find_all("tr"):
+            # 提取协议、代理和国家的单元格
+            cells = row.find_all("td")
+            protocol = cells[0].text.strip().lower()
+            proxy = cells[1].text.strip()
+            # 根据协议分类存储代理
+            if protocol in proxies_dict:
+                proxies_dict[protocol].append(proxy)
+    if choose_protocol in proxies_dict:
+        proxies_list = proxies_dict[choose_protocol]
+    else:
+        proxies_list = proxies_dict["http"]
+    return proxies_list
+def get_proxy():
+    ip_list = scrape_and_categorize_proxies(choose_protocol="http")
+    choose_ip = random.choice(ip_list)
+    proxies = {"http": f"http://{choose_ip}", "https": f"http://{choose_ip}"}
+    print(f"Using proxy: {proxies}")
+    return proxies
+def download_file(target_url, store_path, file_name, check=False):
+    # Check if the file exists
+    fname = Path(store_path) / file_name
+    if check:
+        if check_existing_file(fname):
+            count_dict["skip"] += 1
+            return
+    clear_existing_file(fname)
+    # -----------------------------------------------
     print(f"[bold #f0f6d0]Requesting {file_name}...")
     # 创建会话
     s = requests.Session()
     download_success = False
     request_times = 0
-    filename = Path(store_path) / file_name
-    def calculate_wait_time(time_str):
+    def calculate_wait_time(time_str, target_url):
         import re
         # 定义正则表达式，匹配YYYYMMDDHH格式的时间
@@ -561,16 +658,15 @@ def dlownload_file(target_url, store_path, file_name, check=False):
             delta_t = delta_t / 3 + 1
         else:
             delta_t = 1
+        # 单个要素最多等待5分钟，不宜太短，太短可能请求失败；也不宜太长，太长可能会浪费时间
+        num_var = int(target_url.count("var="))
+        if num_var <= 0:
+            num_var = 1
+        return int(delta_t * 5 * 60 * num_var)
-        return int(delta_t * 180)
-    max_timeout = calculate_wait_time(file_name)
+    max_timeout = calculate_wait_time(file_name, target_url)
+    print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
-    if check:
-        if check_existing_file(filename):
-            count_dict['skip'] += 1
-            return
-    clear_existing_file(filename)
     # print(f'Download_start_time: {datetime.datetime.now()}')
     download_time_s = datetime.datetime.now()
     order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
@@ -593,11 +689,15 @@ def dlownload_file(target_url, store_path, file_name, check=False):
             with open(filename, 'wb') as f:
                 f.write(response.content) """
-            response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))  # 启用流式传输
+            if find_proxy:
+                proxies = get_proxy()
+                response = s.get(target_url, headers=headers, proxies=proxies, stream=True, timeout=random.randint(5, max_timeout))
+            else:
+                response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))  # 启用流式传输
             response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
             # 保存文件
-            with open(filename, "wb") as f:
+            with open(fname, "wb") as f:
                 print(f"[bold #96cbd7]Downloading {file_name}...")
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
@@ -605,12 +705,12 @@ def dlownload_file(target_url, store_path, file_name, check=False):
             f.close()
-            # print(f'\r文件 {filename} 下载成功', end="")
-            if os.path.exists(filename):
+            # print(f'\r文件 {fname} 下载成功', end="")
+            if os.path.exists(fname):
                 download_success = True
                 download_time_e = datetime.datetime.now()
                 download_delta = download_time_e - download_time_s
-                print(f"[#3dfc40]File [bold #dfff73]{filename} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
+                print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
                 count_dict["success"] += 1
                 # print(f'Download_end_time: {datetime.datetime.now()}')
@@ -671,7 +771,8 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
 def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
-    year_str = str(download_time)[:4]
+    # year_str = str(download_time)[:4]
+    ymdh_str = str(download_time)
     if depth is not None and level_num is not None:
         print("Please ensure the depth or level_num is None")
         print("Progress will use the depth")
@@ -683,10 +784,10 @@ def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max
         print(f"Data of single level ({level_num}) will be downloaded...")
         which_mode = "level"
     else:
-        print("Full depth or full level data will be downloaded...")
+        # print("Full depth or full level data will be downloaded...")
         which_mode = "full"
     query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
-    submit_url = get_submit_url(dataset_name, version_name, var, year_str, query_dict)
+    submit_url = get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
     return submit_url
@@ -716,7 +817,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
             file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
             if download_time_end is not None:
                 file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}_{download_time_end}.nc"
-            dlownload_file(submit_url, store_path, file_name, check)
+            download_file(submit_url, store_path, file_name, check)
         else:
             varlist = [_ for _ in var]
             for key, value in var_group.items():
@@ -740,7 +841,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
                     file_name = f"HYCOM_{key}_{download_time}.nc"
                     if download_time_end is not None:
                         file_name = f"HYCOM_{key}_{download_time}_{download_time_end}.nc"
-                dlownload_file(submit_url, store_path, file_name, check)
+                download_file(submit_url, store_path, file_name, check)
 def convert_full_name_to_short_name(full_name):
@@ -767,6 +868,22 @@ def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_ma
     prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
+def done_callback(future, progress, task, total, counter_lock):
+    """
+    # 并行下载任务的回调函数
+    # 这个函数是为了并行下载而设置的，是必须的，直接调用direct_download并行下载会出问题
+    回调函数：当一个任务完成后，会调用这个函数，这样可以及时更新进度条，显示任务的完成情况。
+    本情况下，done_callback函数的作用是当一个任务完成后，更新进度条的进度，显示任务的完成情况。
+    这样，即使多个任务同时执行，也可以及时看到每个任务的完成情况，不会等到所有任务都完成才显示。
+    """
+    global parallel_counter
+    with counter_lock:
+        parallel_counter += 1
+        progress.update(task, advance=1, description=f"[cyan]Downloading... {parallel_counter}/{total}")
 def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
     """
     Description:
@@ -791,6 +908,10 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
     None
     """
     ymdh_time_s, ymdh_time_e = str(time_s), str(time_e)
+    if num_workers is not None and num_workers > 1:  # 如果使用多线程下载，用于进度条显示
+        global parallel_counter
+        parallel_counter = 0
+        counter_lock = Lock()  # 创建一个锁，线程安全的计数器
     if ymdh_time_s == ymdh_time_e:
         prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name)
     elif int(ymdh_time_s) < int(ymdh_time_e):
@@ -808,8 +929,10 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
                     # 并行方式
                     with ThreadPoolExecutor(max_workers=num_workers) as executor:
                         futures = [executor.submit(download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
-                        for i, future in enumerate(futures):
-                            future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}"))
+                        """ for i, future in enumerate(futures):
+                            future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")) """
+                        for feature in as_completed(futures):
+                            done_callback(feature, progress, task, len(time_list), counter_lock)
             else:
                 new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3 * ftimes, "hour")
                 total_num = len(new_time_list)
@@ -824,8 +947,10 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
                     # 并行方式
                     with ThreadPoolExecutor(max_workers=num_workers) as executor:
                         futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
-                        for i, future in enumerate(futures):
-                            future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}"))
+                        """ for i, future in enumerate(futures):
+                            future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")) """
+                        for feature in as_completed(futures):
+                            done_callback(feature, progress, task, len(time_list), counter_lock)
     else:
         print("Please ensure the time_s is no more than time_e")
@@ -907,31 +1032,30 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         time_e = str(time_e)
         if len(time_e) == 8:
             time_e += "21"
     global count_dict
-    count_dict = {
-        'success': 0,
-        'fail': 0,
-        'skip': 0,
-        'no_data': 0,
-        'total': 0,
-        'no_data_list': []
-    }
+    count_dict = {"success": 0, "fail": 0, "skip": 0, "no_data": 0, "total": 0, "no_data_list": []}
+    """ global current_platform
+    current_platform = platform.system() """
+    global find_proxy
+    find_proxy = False
     download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
     count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
     print("[bold #ecdbfe]-" * 160)
     print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}")
-    if count_dict['fail'] > 0:
+    if count_dict["fail"] > 0:
         print("[bold #be5528]Please try again to download the failed data later")
-    if count_dict['no_data'] > 0:
-        if count_dict['no_data'] == 1:
+    if count_dict["no_data"] > 0:
+        if count_dict["no_data"] == 1:
             print(f"[bold #f90000]There is {count_dict['no_data']} data that does not exist in any dataset and version")
         else:
             print(f"[bold #f90000]These are {count_dict['no_data']} data that do not exist in any dataset and version")
-        for no_data in count_dict['no_data_list']:
+        for no_data in count_dict["no_data_list"]:
             print(f"[bold #d81b60]{no_data}")
     print("[bold #ecdbfe]-" * 160)
@@ -987,8 +1111,8 @@ def how_to_use():
 if __name__ == "__main__":
     # help(hycom3h.download)
-    time_s, time_e = "2018070100", "2019123121"
-    merge_name = "2018_2024"
+    time_s, time_e = "2023010100", "2023123121"
+    merge_name = f"{time_s}_{time_e}"  # 合并后的文件名
     root_path = r"G:\Data\HYCOM\3hourly"
     location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
     download_dict = {
@@ -1003,16 +1127,13 @@ if __name__ == "__main__":
         "salinity_bottom": {"simple_name": "salt_b", "download": 0},
     }
-    var_list = []
-    for var_name in download_dict.keys():
-        if download_dict[var_name]["download"] == 1:
-            var_list.append(var_name)
+    var_list = [var_name for var_name in download_dict.keys() if download_dict[var_name]["download"]]
     # set depth or level, only one can be True
     # if you wanna download all depth or level, set both False
     depth = None  # or 0-5000 meters
     level = None  # or 1-40 levels
-    num_workers = 1
+    num_workers = 3
     check = True
     ftimes = 1

oafuncs/oa_down/test.py CHANGED Viewed

@@ -1,45 +1,45 @@
 #!/usr/bin/env python
 # coding=utf-8
-'''
+"""
 Author: Liu Kun && 16031215@qq.com
 Date: 2024-12-01 19:32:25
 LastEditors: Liu Kun && 16031215@qq.com
-LastEditTime: 2024-12-01 19:50:32
+LastEditTime: 2024-12-10 11:16:36
 FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\test.py
-Description:
+Description:
 EditPlatform: vscode
 ComputerInfo: XPS 15 9510
 SystemInfo: Windows 11
 Python Version: 3.12
-'''
+"""
 import os
 import random
+import re
-txtfile = r'E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list.txt'
-with open(txtfile, 'r') as f:
-    lines = f.readlines()
-    # 去掉换行符和空行
-    lines = [line.strip() for line in lines if line.strip()]
-new_line = []
-for i in range(len(lines)):
-    if '/' in lines[i]:
-        new_line.append(lines[i])
-    else:
-        print(lines[i])
+def is_valid_user_agent(user_agent):
+    # 简单的正则表达式来检查User Agent的格式
+    # 这个正则表达式检查User Agent是否包含常见的浏览器信息格式
+    pattern = re.compile(
+        r"^(?:(?:Mozilla|Opera|Chrome|Safari|Edg|OPR)/[\d.]+)"
+        r"(?:\s(?:\(.*?\)))?"
+        r"(?:\s(?:Gecko|AppleWebKit|KHTML, like Gecko|Version|Edge|OPR)/[\d.]+)?"
+        r"(?:\s.*?(?:rv:|Version/|Ubuntu|Macintosh|Windows|X11|Linux|CrOS|FreeBSD|OpenBSD|NetBSD|iPhone|iPad|iPod|Android|BlackBerry|BB10|Mobile|Symbian|Windows Phone|IEMobile|Opera Mini|Opera Mobi|UCBrowser|MQQBrowser|baiduboxapp|baidubrowser|Safari|Firefox|MSIE|Trident|Edge|EdgA|Chrome|CriOS|Vivaldi|Sleipnir|Midori|ELinks|Lynx|w3m|Arora|Epiphany|Konqueror|Dillo|Netscape|SeaMonkey|K-Meleon|Camino|Iceape|Galeon|GranParadiso|Iceweasel|Firefox|Fennec|Conkeror|PaleMoon|Uzbl|QupZilla|Otter|Waterfox|Basilisk|Cyberfox|PaleMoon|GNU IceCat|GNU IceWeasel|IceCat|IceWeasel|Seamonkey|Iceape|Firefox|Epiphany|Web|Safari|Android|Mobile|BlackBerry|BB10|Tablet|Silk|Kindle|FxiOS|Focus|SamsungBrowser|browser|AppleWebKit|Puffin|DuckDuckGo|YaBrowser|Yandex|Amigo|NokiaBrowser|OviBrowser|OneBrowser|Chrome|Firefox|Safari|OPR|Coast|Mercury|Silk|Skyfire|IEMobile|Bolt|Jasmine|NativeHost|Crosswalk|TizenBrowser|SailfishBrowser|SamsungBrowser|Silk-Accelerated|UCBrowser|Quark|XiaoMi|OnePlus|Vivo|Oppo|Realme|Meizu|Lenovo|Huawei|ZTE|Alcatel|Sony|Nokia|LG|HTC|Asus|Acer|Motorola|Samsung)/[\d.]+)?$"
+    )
-newtxtfile = r'E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\ua_list_new.txt'
-""" with open(newtxtfile, 'w') as f:
-    for line in new_line:
-        f.write(line + '\n') """
+    # 使用正则表达式匹配User Agent字符串
+    if pattern.match(user_agent):
+        return True
+    else:
+        return False
 def get_ua():
     current_dir = os.path.dirname(os.path.abspath(__file__))
-    ua_file_txt = os.path.join(current_dir, 'User_Agent-list.txt')
+    ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
-    with open(ua_file_txt, 'r') as f:
+    with open(ua_file_txt, "r") as f:
         ua_list = f.readlines()
         # 去掉换行符和空行
         ua_list = [line.strip() for line in ua_list if line.strip()]
@@ -47,9 +47,6 @@ def get_ua():
     return random.choice(ua_list)
-print(get_ua())
 def get_ua_org():
     ua_list = [
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
@@ -115,13 +112,40 @@ def get_ua_org():
         "NOKIA5700/UCWEB7.0.2.37/28/999",
         "Openwave/UCWEB7.0.2.37/28/999",
         "Openwave/UCWEB7.0.2.37/28/999",
     ]
-    with open(newtxtfile, 'w') as f:
+    with open(newtxtfile, "w") as f:
         for line in ua_list:
-            f.write(line + '\n')
+            f.write(line + "\n")
     # print(f'Using User-Agent: {ua}')
     ua = random.choice(ua_list)
     return ua
-# get_ua_org()
+# get_ua_org()
+if __name__ == "__main__":
+    txtfile = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list.txt"
+    with open(txtfile, "r") as f:
+        lines = f.readlines()
+        # 去掉换行符和空行
+        lines = [line.strip() for line in lines if line.strip()]
+    """ new_line = []
+    for i in range(len(lines)):
+        if '/' in lines[i]:
+            new_line.append(lines[i])
+        else:
+            print(lines[i]) """
+    new_line = []
+    for line in lines:
+        if is_valid_user_agent(line):
+            # print(line)
+            new_line.append(line)
+        else:
+            print(f"Invalid User-Agent: {line}")
+    newtxtfile = r"E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\ua_list_new.txt"
+    with open(newtxtfile, "w") as f:
+        for line in new_line:
+            f.write(line + "\n")

oafuncs 0.0.78__py2.py3-none-any.whl → 0.0.80__py2.py3-none-any.whl

oafuncs 0.0.78py2.py3-none-any.whl → 0.0.80py2.py3-none-any.whl