PyPI - oafuncs - Versions diffs - 0.0.98.3__py3-none-any.whl → 0.0.98.4__py3-none-any.whl - Mend

oafuncs 0.0.98.3py3-none-any.whl → 0.0.98.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

oafuncs/_script/parallel.py +158 -509
oafuncs/_script/parallel_test.py +14 -0
oafuncs/oa_down/User_Agent-list.txt +1 -1611
oafuncs/oa_down/hycom_3hourly.py +109 -75
oafuncs/oa_down/hycom_3hourly_20250416.py +1191 -0
oafuncs/oa_down/test_ua.py +27 -138
oafuncs/oa_tool.py +118 -30
{oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/METADATA +2 -1
{oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/RECORD +12 -11
oafuncs/_script/parallel_example_usage.py +0 -83
{oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/WHEEL +0 -0
{oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/licenses/LICENSE.txt +0 -0
{oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/top_level.txt +0 -0

oafuncs/oa_down/hycom_3hourly.py CHANGED Viewed

@@ -28,6 +28,8 @@ import netCDF4 as nc
 import numpy as np
 import pandas as pd
 import requests
+from requests.adapters import HTTPAdapter
+import httpx
 import xarray as xr
 from rich import print
 from rich.progress import Progress
@@ -419,8 +421,10 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
         print(f"[bold red]{time_input_str} is in the following dataset and version:")
     if have_data:
         if match_time is None:
+            dv_num = 1
             for d, v, trange in zip(d_list, v_list, trange_list):
-                print(f"[bold blue]{d} {v} {trange}")
+                print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
+                dv_num += 1
         if is_single_time:
             return True
         else:
@@ -611,7 +615,7 @@ def _get_mean_size_move(same_file, current_file):
         size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
         if abs(size_difference_ratio) > tolerance_ratio:
-            if check_nc(current_file,print_messages=False):
+            if check_nc(current_file, print_messages=False):
                 fsize_dict[same_file]["size_list"] = [current_file_size]
                 fsize_dict[same_file]["mean_size"] = current_file_size
             else:
@@ -662,6 +666,98 @@ def _correct_time(nc_file):
     modify_nc(nc_file, "time", None, time_difference)
+def _download_within_python_requests(file_name, target_url, fname):
+    print(f"[bold #f0f6d0]Requesting {file_name} ...")
+    # Session configuration
+    session = requests.Session()
+    adapter = HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=0)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+    # Timeout and retry config
+    num_var = max(target_url.count("var="), 1)
+    max_timeout = 5 * 30 * num_var
+    order_terms = ["1st", "2nd", "3rd"]
+    download_start = datetime.datetime.now()
+    max_attempts = 5
+    for attempt in range(max_attempts):
+        if attempt > 0:
+            retry_desc = order_terms[attempt - 1] if attempt - 1 < len(order_terms) else f"{attempt}th"
+            print(f"[bold #ffe5c0]Retrying the {retry_desc} time...")
+            time.sleep(2 + random.uniform(0, 2))
+        timeout = random.randint(max_timeout // 5, max_timeout)
+        print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
+        try:
+            headers = {"User-Agent": get_ua()}
+            with session.get(target_url, headers=headers, stream=True, timeout=timeout) as response:
+                response.raise_for_status()
+                print(f"[bold #96cbd7]Downloading {file_name} ...")
+                with open(fname, "wb") as f:
+                    for chunk in response.iter_content(chunk_size=32 * 1024):
+                        if chunk:
+                            f.write(chunk)
+            elapsed = datetime.datetime.now() - download_start
+            print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
+            count_dict["success"] += 1
+            return
+        except Exception as e:
+            if hasattr(e, "response") and getattr(e.response, "status_code", None):
+                err_msg = f"HTTP {e.response.status_code} Error"
+            elif isinstance(e, requests.exceptions.Timeout):
+                err_msg = "Timeout Error"
+            elif isinstance(e, requests.exceptions.ConnectionError):
+                err_msg = "Connection Error"
+            elif isinstance(e, requests.exceptions.RequestException):
+                err_msg = "Request Error"
+            else:
+                err_msg = "Unexpected Error"
+            print(f"[bold red]Download failed for {file_name}: {err_msg}. Details: {e}")
+    print(f"[bold #ffe5c0]Download failed after {max_attempts} attempts. Target URL: \n{target_url}")
+    count_dict["fail"] += 1
+def _download_within_python(file_name, target_url, fname):
+    print(f"[bold #f0f6d0]Requesting {file_name} ...")
+    # 创建 httpx 同步客户端
+    limits = httpx.Limits(max_connections=10, max_keepalive_connections=10)
+    transport = httpx.HTTPTransport(retries=3)
+    client = httpx.Client(limits=limits, transport=transport, timeout=None)
+    num_var = max(target_url.count("var="), 1)
+    max_timeout = 5 * 30 * num_var
+    timeout = random.randint(max_timeout // 2, max_timeout)
+    download_start = datetime.datetime.now()
+    print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
+    headers = {"User-Agent": get_ua()}
+    try:
+        response = client.get(target_url, headers=headers, timeout=timeout, follow_redirects=True)
+        response.raise_for_status()
+        print(f"[bold #96cbd7]Downloading {file_name} ...")
+        with open(fname, "wb") as f:
+            for chunk in response.iter_bytes(32 * 1024):
+                if chunk:
+                    f.write(chunk)
+        elapsed = datetime.datetime.now() - download_start
+        print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
+        count_dict["success"] += 1
+    except Exception as e:
+        err_type = type(e).__name__
+        print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
+        print(f"[bold #ffe5c0]Target URL: \n{target_url}")
+        count_dict["fail"] += 1
+    finally:
+        client.close()
 def _download_file(target_url, store_path, file_name, cover=False):
     fname = Path(store_path) / file_name
     file_name_split = file_name.split("_")
@@ -699,69 +795,7 @@ def _download_file(target_url, store_path, file_name, cover=False):
     _clear_existing_file(fname)
     if not use_idm:
-        print(f"[bold #f0f6d0]Requesting {file_name} ...")
-        s = requests.Session()
-        download_success = False
-        request_times = 0
-        def calculate_wait_time(time_str, target_url):
-            time_pattern = r"\d{10}"
-            times_in_str = re.findall(time_pattern, time_str)
-            num_times_str = len(times_in_str)
-            if num_times_str > 1:
-                delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
-                delta_t = delta_t.total_seconds() / 3600
-                delta_t = delta_t / 3 + 1
-            else:
-                delta_t = 1
-            num_var = int(target_url.count("var="))
-            if num_var <= 0:
-                num_var = 1
-            return int(delta_t * 5 * 60 * num_var)
-        max_timeout = calculate_wait_time(file_name, target_url)
-        print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
-        download_time_s = datetime.datetime.now()
-        order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
-        while not download_success:
-            if request_times >= 10:
-                print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
-                count_dict["fail"] += 1
-                break
-            if request_times > 0:
-                print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
-            try:
-                headers = {"User-Agent": get_ua()}
-                response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))
-                response.raise_for_status()
-                with open(fname, "wb") as f:
-                    print(f"[bold #96cbd7]Downloading {file_name} ...")
-                    for chunk in response.iter_content(chunk_size=1024):
-                        if chunk:
-                            f.write(chunk)
-                f.close()
-                if os.path.exists(fname):
-                    download_success = True
-                    download_time_e = datetime.datetime.now()
-                    download_delta = download_time_e - download_time_s
-                    print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
-                    count_dict["success"] += 1
-            except requests.exceptions.HTTPError as errh:
-                print(f"Http Error: {errh}")
-            except requests.exceptions.ConnectionError as errc:
-                print(f"Error Connecting: {errc}")
-            except requests.exceptions.Timeout as errt:
-                print(f"Timeout Error: {errt}")
-            except requests.exceptions.RequestException as err:
-                print(f"OOps: Something Else: {err}")
-            time.sleep(3)
-            request_times += 1
+        _download_within_python(file_name, target_url, fname)
     else:
         idm_downloader(target_url, store_path, file_name, given_idm_engine)
         idm_download_list.append(fname)
@@ -992,7 +1026,7 @@ def download(
     Returns:
         None
     Example:
         >>> download(
         variables='u',
@@ -1088,7 +1122,7 @@ def download(
     if validate_time is not None:
         workers = 1
-        print('*' * mark_len)
+        print("*" * mark_len)
         print("[bold red]Only checking the time of existing files.")
         bar_desc = "Checking time ..."
@@ -1158,20 +1192,20 @@ if __name__ == "__main__":
     options = {
         "variables": var_list,
-        "start_time": "2025010300",
-        "end_time": "2025010309",
-        "output_dir": r"I:\Data\HYCOM\3hourly_test",
+        "start_time": "2018010100",
+        "end_time": "2021010100",
+        "output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
         "lon_min": 105,
-        "lon_max": 130,
-        "lat_min": 15,
+        "lon_max": 135,
+        "lat_min": 10,
         "lat_max": 45,
         "workers": 1,
         "overwrite": False,
         "depth": None,
         "level": None,
-        "validate_time": True,
-        "idm_path": r'D:\Programs\Internet Download Manager\IDMan.exe',
-        "interval_hours": 3,
+        "validate_time": None,
+        # "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
+        "interval_hours": 24,
     }
     if single_var:

oafuncs 0.0.98.3__py3-none-any.whl → 0.0.98.4__py3-none-any.whl

oafuncs 0.0.98.3py3-none-any.whl → 0.0.98.4py3-none-any.whl