PyPI - oafuncs - Versions diffs - 0.0.98.4__py3-none-any.whl → 0.0.98.6__py3-none-any.whl - Mend

oafuncs 0.0.98.4py3-none-any.whl → 0.0.98.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

oafuncs/oa_down/hycom_3hourly.py CHANGED Viewed

@@ -13,7 +13,9 @@ SystemInfo: Windows 11
 Python Version: 3.12
 """
+import asyncio
 import datetime
+import logging
 import os
 import random
 import re
@@ -22,17 +24,15 @@ import warnings
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from threading import Lock
+from oafuncs.oa_tool import pbar
+import httpx
 import matplotlib.pyplot as plt
 import netCDF4 as nc
 import numpy as np
 import pandas as pd
-import requests
-from requests.adapters import HTTPAdapter
-import httpx
 import xarray as xr
 from rich import print
-from rich.progress import Progress
 from oafuncs.oa_down.idm import downloader as idm_downloader
 from oafuncs.oa_down.user_agent import get_ua
@@ -40,6 +40,9 @@ from oafuncs.oa_file import file_size
 from oafuncs.oa_nc import check as check_nc
 from oafuncs.oa_nc import modify as modify_nc
+logging.getLogger("httpx").setLevel(logging.WARNING)  # 关闭 httpx 的 INFO 日志，只显示 WARNING 及以上
 warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
 __all__ = ["draw_time_range", "download"]
@@ -416,11 +419,9 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
                     trange_list.append(f"{time_s}-{time_e}")
                     have_data = True
-    # 输出结果
-    if match_time is None:
-        print(f"[bold red]{time_input_str} is in the following dataset and version:")
     if have_data:
         if match_time is None:
+            print(f"[bold red]Time {time_input_str} included in:")
             dv_num = 1
             for d, v, trange in zip(d_list, v_list, trange_list):
                 print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
@@ -436,7 +437,7 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
                 print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
                 return False
     else:
-        print(f"[bold red]{time_input_str} is not in any dataset and version")
+        print(f"[bold red]Time {time_input_str} has no data")
         return False
@@ -511,7 +512,8 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
     if dataset_name_out is not None and version_name_out is not None:
         if match_time is None:
-            print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
+            # print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
+            print(f"[bold purple]Chosen dataset: {dataset_name_out} - {version_name_out}")
     # 如果没有找到匹配的数据集和版本，会返回 None
     return dataset_name_out, version_name_out
@@ -666,140 +668,138 @@ def _correct_time(nc_file):
     modify_nc(nc_file, "time", None, time_difference)
-def _download_within_python_requests(file_name, target_url, fname):
-    print(f"[bold #f0f6d0]Requesting {file_name} ...")
-    # Session configuration
-    session = requests.Session()
-    adapter = HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=0)
-    session.mount("http://", adapter)
-    session.mount("https://", adapter)
-    # Timeout and retry config
-    num_var = max(target_url.count("var="), 1)
-    max_timeout = 5 * 30 * num_var
-    order_terms = ["1st", "2nd", "3rd"]
-    download_start = datetime.datetime.now()
-    max_attempts = 5
-    for attempt in range(max_attempts):
-        if attempt > 0:
-            retry_desc = order_terms[attempt - 1] if attempt - 1 < len(order_terms) else f"{attempt}th"
-            print(f"[bold #ffe5c0]Retrying the {retry_desc} time...")
-            time.sleep(2 + random.uniform(0, 2))
-        timeout = random.randint(max_timeout // 5, max_timeout)
-        print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
-        try:
-            headers = {"User-Agent": get_ua()}
-            with session.get(target_url, headers=headers, stream=True, timeout=timeout) as response:
-                response.raise_for_status()
-                print(f"[bold #96cbd7]Downloading {file_name} ...")
-                with open(fname, "wb") as f:
-                    for chunk in response.iter_content(chunk_size=32 * 1024):
-                        if chunk:
+def setup_logger(level=logging.INFO):
+    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
+class _HycomDownloader:
+    def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
+        self.tasks = tasks
+        self.delay_range = delay_range
+        self.timeout_factor = timeout_factor
+        self.max_var_count = max_var_count
+        self.max_retries = max_retries
+        self.count = {"success": 0, "fail": 0}
+        setup_logger()
+    def user_agent(self):
+        return get_ua()
+    async def _download_one(self, url, save_path):
+        file_name = os.path.basename(save_path)
+        headers = {"User-Agent": self.user_agent()}
+        var_count = min(max(url.count("var="), 1), self.max_var_count)
+        timeout_max = self.timeout_factor * var_count
+        retry = 0
+        while retry <= self.max_retries:
+            timeout = random.randint(timeout_max // 2, timeout_max)
+            try:
+                await asyncio.sleep(random.uniform(*self.delay_range))
+                start = datetime.datetime.now()
+                async with httpx.AsyncClient(
+                    timeout=httpx.Timeout(timeout),
+                    limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
+                    transport=httpx.AsyncHTTPTransport(retries=2),
+                ) as client:
+                    logging.info(f"Requesting {file_name} (Attempt {retry + 1}) ...")
+                    response = await client.get(url, headers=headers, follow_redirects=True)
+                    response.raise_for_status()
+                    if not response.content:
+                        raise ValueError("Empty response received")
+                    logging.info(f"Downloading {file_name} ...")
+                    with open(save_path, "wb") as f:
+                        total = int(response.headers.get("Content-Length", 0))
+                        downloaded = 0
+                        last_percent = -1
+                        async for chunk in response.aiter_bytes(32 * 1024):
                             f.write(chunk)
-            elapsed = datetime.datetime.now() - download_start
-            print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
-            count_dict["success"] += 1
-            return
-        except Exception as e:
-            if hasattr(e, "response") and getattr(e.response, "status_code", None):
-                err_msg = f"HTTP {e.response.status_code} Error"
-            elif isinstance(e, requests.exceptions.Timeout):
-                err_msg = "Timeout Error"
-            elif isinstance(e, requests.exceptions.ConnectionError):
-                err_msg = "Connection Error"
-            elif isinstance(e, requests.exceptions.RequestException):
-                err_msg = "Request Error"
-            else:
-                err_msg = "Unexpected Error"
-            print(f"[bold red]Download failed for {file_name}: {err_msg}. Details: {e}")
-    print(f"[bold #ffe5c0]Download failed after {max_attempts} attempts. Target URL: \n{target_url}")
-    count_dict["fail"] += 1
-def _download_within_python(file_name, target_url, fname):
-    print(f"[bold #f0f6d0]Requesting {file_name} ...")
-    # 创建 httpx 同步客户端
-    limits = httpx.Limits(max_connections=10, max_keepalive_connections=10)
-    transport = httpx.HTTPTransport(retries=3)
-    client = httpx.Client(limits=limits, transport=transport, timeout=None)
-    num_var = max(target_url.count("var="), 1)
-    max_timeout = 5 * 30 * num_var
-    timeout = random.randint(max_timeout // 2, max_timeout)
-    download_start = datetime.datetime.now()
+                            downloaded += len(chunk)
+                            if total > 0:
+                                percent = int(downloaded * 100 / total)
+                                if percent != last_percent:
+                                    logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
+                                    last_percent = percent
+                    elapsed = datetime.datetime.now() - start
+                    # logging.info(f"File {file_name} downloaded, Time: {elapsed}")
+                    logging.info(f"Saving {file_name}, Time: {elapsed}")
+                    self.count["success"] += 1
+                    count_dict["success"] += 1
+                    return
+            except Exception as e:
+                logging.error(f"Failed ({type(e).__name__}): {e}")
+                if retry < self.max_retries:
+                    backoff = 2**retry
+                    logging.warning(f"Retrying in {backoff:.1f}s ...")
+                    await asyncio.sleep(backoff)
+                    retry += 1
+                else:
+                    logging.error(f"Giving up on {file_name}")
+                    self.count["fail"] += 1
+                    count_dict["fail"] += 1
+                    return
-    print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
-    headers = {"User-Agent": get_ua()}
+    async def run(self):
+        logging.info(f"📥 Starting download of {len(self.tasks)} files ...")
+        for url, save_path in self.tasks:
+            await self._download_one(url, save_path)
-    try:
-        response = client.get(target_url, headers=headers, timeout=timeout, follow_redirects=True)
-        response.raise_for_status()
-        print(f"[bold #96cbd7]Downloading {file_name} ...")
-        with open(fname, "wb") as f:
-            for chunk in response.iter_bytes(32 * 1024):
-                if chunk:
-                    f.write(chunk)
-        elapsed = datetime.datetime.now() - download_start
-        print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
-        count_dict["success"] += 1
-    except Exception as e:
-        err_type = type(e).__name__
-        print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
-        print(f"[bold #ffe5c0]Target URL: \n{target_url}")
-        count_dict["fail"] += 1
-    finally:
-        client.close()
+        logging.info("✅ All tasks completed.")
+        logging.info(f"✔️  Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
 def _download_file(target_url, store_path, file_name, cover=False):
-    fname = Path(store_path) / file_name
+    save_path = Path(store_path) / file_name
     file_name_split = file_name.split("_")
     file_name_split = file_name_split[:-1]
     same_file = "_".join(file_name_split) + "*nc"
     if match_time is not None:
-        if check_nc(fname, print_messages=False):
-            if not _check_ftime(fname, if_print=True):
+        if check_nc(save_path, print_messages=False):
+            if not _check_ftime(save_path, if_print=True):
                 if match_time:
-                    _correct_time(fname)
+                    _correct_time(save_path)
                     count_dict["skip"] += 1
                 else:
-                    _clear_existing_file(fname)
+                    _clear_existing_file(save_path)
                     count_dict["no_data"] += 1
             else:
                 count_dict["skip"] += 1
                 print(f"[bold green]{file_name} is correct")
         return
-    if not cover and os.path.exists(fname):
-        print(f"[bold #FFA54F]{fname} exists, skipping ...")
+    if not cover and os.path.exists(save_path):
+        print(f"[bold #FFA54F]{save_path} exists, skipping ...")
         count_dict["skip"] += 1
         return
     if same_file not in fsize_dict.keys():
-        check_nc(fname, delete_if_invalid=True, print_messages=False)
+        check_nc(save_path, delete_if_invalid=True, print_messages=False)
-    get_mean_size = _get_mean_size_move(same_file, fname)
+    get_mean_size = _get_mean_size_move(same_file, save_path)
-    if _check_existing_file(fname, get_mean_size):
+    if _check_existing_file(save_path, get_mean_size):
         count_dict["skip"] += 1
         return
-    _clear_existing_file(fname)
+    _clear_existing_file(save_path)
     if not use_idm:
-        _download_within_python(file_name, target_url, fname)
+        python_downloader = _HycomDownloader([(target_url, save_path)])
+        asyncio.run(python_downloader.run())
+        time.sleep(3 + random.uniform(0, 10))
     else:
         idm_downloader(target_url, store_path, file_name, given_idm_engine)
-        idm_download_list.append(fname)
-        print(f"[bold #3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been submit to IDM for downloading")
+        idm_download_list.append(save_path)
+        # print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
+        time.sleep(3 + random.uniform(0, 10))
 def _check_hour_is_valid(ymdh_str):
@@ -890,7 +890,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
         else:
             if download_time < "2024081012":
                 varlist = [_ for _ in var]
-                for key, value in var_group.items():
+                for key, value in pbar(var_group.items(), description=f"Var_group {download_time} ->", total=len(var_group), cmap="bwr", next_line=True):
                     current_group = []
                     for v in varlist:
                         if v in value:
@@ -912,7 +912,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
                             file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
                     _download_file(submit_url, store_path, file_name, cover)
             else:
-                for v in var:
+                for v in pbar(var,description=f'Var {download_time} ->', total=len(var), cmap='bwr', next_line=True):
                     submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
                     file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
                     if download_time_end is not None:
@@ -946,7 +946,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
     if num_workers is not None and num_workers > 1:
         global parallel_counter
         parallel_counter = 0
-        counter_lock = Lock()
+        counter_lock = Lock()  # noqa: F841
     if ymdh_time_s == ymdh_time_e:
         _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
     elif int(ymdh_time_s) < int(ymdh_time_e):
@@ -954,17 +954,19 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
             print("*" * mark_len)
             print("Downloading a series of files...")
         time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
-        with Progress() as progress:
-            task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
-            if num_workers is None or num_workers <= 1:
-                for i, time_str in enumerate(time_list):
-                    _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
-                    progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
-            else:
-                with ThreadPoolExecutor(max_workers=num_workers) as executor:
-                    futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
-                    for feature in as_completed(futures):
-                        _done_callback(feature, progress, task, len(time_list), counter_lock)
+        # with Progress() as progress:
+            # task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
+        if num_workers is None or num_workers <= 1:
+            for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), cmap='colorful_1', next_line=True):
+                _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
+                # progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
+        else:
+            with ThreadPoolExecutor(max_workers=num_workers) as executor:
+                futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
+                """ for feature in as_completed(futures):
+                    _done_callback(feature, progress, task, len(time_list), counter_lock) """
+                for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),cmap='colorful_1',next_line=True):
+                    pass
     else:
         print("[bold red]Please ensure the time_s is no more than time_e")
@@ -1048,7 +1050,6 @@ def download(
         interval_hours=3,
         )
     """
-    from oafuncs.oa_tool import pbar
     _get_initial_data()
@@ -1109,10 +1110,10 @@ def download(
         workers = 1
         given_idm_engine = idm_path
         idm_download_list = []
-        bar_desc = "Submitting to IDM ..."
+        bar_desc = "Submitting to IDM ->"
     else:
         use_idm = False
-        bar_desc = "Downloading ..."
+        bar_desc = "Downloading ->"
     global match_time
     match_time = validate_time
@@ -1124,7 +1125,7 @@ def download(
         workers = 1
         print("*" * mark_len)
         print("[bold red]Only checking the time of existing files.")
-        bar_desc = "Checking time ..."
+        bar_desc = "Checking time ->"
     _download_hourly_func(
         variables,
@@ -1150,7 +1151,7 @@ def download(
         print("[bold #ecdbfe]*" * mark_len)
         if idm_download_list:
             remain_list = idm_download_list.copy()
-            for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading: "):
+            for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading ->"):
                 success = False
                 while not success:
                     for f in remain_list:
@@ -1193,7 +1194,7 @@ if __name__ == "__main__":
     options = {
         "variables": var_list,
         "start_time": "2018010100",
-        "end_time": "2021010100",
+        "end_time": "2019063000",
         "output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
         "lon_min": 105,
         "lon_max": 135,
@@ -1206,6 +1207,7 @@ if __name__ == "__main__":
         "validate_time": None,
         # "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
         "interval_hours": 24,
+        "proxy_txt": None,
     }
     if single_var:

oafuncs 0.0.98.4__py3-none-any.whl → 0.0.98.6__py3-none-any.whl

oafuncs 0.0.98.4py3-none-any.whl → 0.0.98.6py3-none-any.whl