oafuncs 0.0.98.4__py3-none-any.whl → 0.0.98.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,9 @@ SystemInfo: Windows 11
13
13
  Python Version: 3.12
14
14
  """
15
15
 
16
+ import asyncio
16
17
  import datetime
18
+ import logging
17
19
  import os
18
20
  import random
19
21
  import re
@@ -23,13 +25,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
23
25
  from pathlib import Path
24
26
  from threading import Lock
25
27
 
28
+ import httpx
26
29
  import matplotlib.pyplot as plt
27
30
  import netCDF4 as nc
28
31
  import numpy as np
29
32
  import pandas as pd
30
- import requests
31
- from requests.adapters import HTTPAdapter
32
- import httpx
33
33
  import xarray as xr
34
34
  from rich import print
35
35
  from rich.progress import Progress
@@ -40,6 +40,9 @@ from oafuncs.oa_file import file_size
40
40
  from oafuncs.oa_nc import check as check_nc
41
41
  from oafuncs.oa_nc import modify as modify_nc
42
42
 
43
+ logging.getLogger("httpx").setLevel(logging.WARNING) # 关闭 httpx 的 INFO 日志,只显示 WARNING 及以上
44
+
45
+
43
46
  warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
44
47
 
45
48
  __all__ = ["draw_time_range", "download"]
@@ -416,11 +419,9 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
416
419
  trange_list.append(f"{time_s}-{time_e}")
417
420
  have_data = True
418
421
 
419
- # 输出结果
420
- if match_time is None:
421
- print(f"[bold red]{time_input_str} is in the following dataset and version:")
422
422
  if have_data:
423
423
  if match_time is None:
424
+ print(f"[bold red]Time {time_input_str} included in:")
424
425
  dv_num = 1
425
426
  for d, v, trange in zip(d_list, v_list, trange_list):
426
427
  print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
@@ -436,7 +437,7 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
436
437
  print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
437
438
  return False
438
439
  else:
439
- print(f"[bold red]{time_input_str} is not in any dataset and version")
440
+ print(f"[bold red]Time {time_input_str} has no data")
440
441
  return False
441
442
 
442
443
 
@@ -511,7 +512,8 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
511
512
 
512
513
  if dataset_name_out is not None and version_name_out is not None:
513
514
  if match_time is None:
514
- print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
515
+ # print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
516
+ print(f"[bold purple]Chosen dataset: {dataset_name_out} - {version_name_out}")
515
517
 
516
518
  # 如果没有找到匹配的数据集和版本,会返回 None
517
519
  return dataset_name_out, version_name_out
@@ -666,140 +668,122 @@ def _correct_time(nc_file):
666
668
  modify_nc(nc_file, "time", None, time_difference)
667
669
 
668
670
 
669
- def _download_within_python_requests(file_name, target_url, fname):
670
- print(f"[bold #f0f6d0]Requesting {file_name} ...")
671
-
672
- # Session configuration
673
- session = requests.Session()
674
- adapter = HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=0)
675
- session.mount("http://", adapter)
676
- session.mount("https://", adapter)
677
-
678
- # Timeout and retry config
679
- num_var = max(target_url.count("var="), 1)
680
- max_timeout = 5 * 30 * num_var
681
- order_terms = ["1st", "2nd", "3rd"]
682
- download_start = datetime.datetime.now()
683
- max_attempts = 5
684
-
685
- for attempt in range(max_attempts):
686
- if attempt > 0:
687
- retry_desc = order_terms[attempt - 1] if attempt - 1 < len(order_terms) else f"{attempt}th"
688
- print(f"[bold #ffe5c0]Retrying the {retry_desc} time...")
689
- time.sleep(2 + random.uniform(0, 2))
690
-
691
- timeout = random.randint(max_timeout // 5, max_timeout)
692
- print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
693
-
694
- try:
695
- headers = {"User-Agent": get_ua()}
696
- with session.get(target_url, headers=headers, stream=True, timeout=timeout) as response:
697
- response.raise_for_status()
698
- print(f"[bold #96cbd7]Downloading {file_name} ...")
699
- with open(fname, "wb") as f:
700
- for chunk in response.iter_content(chunk_size=32 * 1024):
701
- if chunk:
671
+ class _HycomDownloader:
672
+ def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
673
+ """
674
+ :param tasks: List of (url, save_path)
675
+ """
676
+ self.tasks = tasks
677
+ self.delay_range = delay_range
678
+ self.timeout_factor = timeout_factor
679
+ self.max_var_count = max_var_count
680
+ self.max_retries = max_retries
681
+ self.count = {"success": 0, "fail": 0}
682
+
683
+ def user_agent(self):
684
+ return get_ua()
685
+
686
+ async def _download_one(self, url, save_path):
687
+ file_name = os.path.basename(save_path)
688
+ headers = {"User-Agent": self.user_agent()}
689
+ var_count = min(max(url.count("var="), 1), self.max_var_count)
690
+ timeout_max = self.timeout_factor * var_count
691
+ timeout = random.randint(timeout_max // 2, timeout_max)
692
+
693
+ retry = 0
694
+ while retry <= self.max_retries:
695
+ try:
696
+ await asyncio.sleep(random.uniform(*self.delay_range))
697
+ start = datetime.datetime.now()
698
+ async with httpx.AsyncClient(
699
+ timeout=httpx.Timeout(timeout),
700
+ limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
701
+ transport=httpx.AsyncHTTPTransport(retries=2),
702
+ ) as client:
703
+ print(f"[bold #f0f6d0]Requesting {file_name} (Attempt {retry + 1}) ...")
704
+ response = await client.get(url, headers=headers, follow_redirects=True)
705
+ response.raise_for_status()
706
+ if not response.content:
707
+ raise ValueError("Empty response received")
708
+
709
+ print(f"[bold #96cbd7]Downloading {file_name} ...")
710
+ with open(save_path, "wb") as f:
711
+ async for chunk in response.aiter_bytes(32 * 1024):
702
712
  f.write(chunk)
703
- elapsed = datetime.datetime.now() - download_start
704
- print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
705
- count_dict["success"] += 1
706
- return
707
- except Exception as e:
708
- if hasattr(e, "response") and getattr(e.response, "status_code", None):
709
- err_msg = f"HTTP {e.response.status_code} Error"
710
- elif isinstance(e, requests.exceptions.Timeout):
711
- err_msg = "Timeout Error"
712
- elif isinstance(e, requests.exceptions.ConnectionError):
713
- err_msg = "Connection Error"
714
- elif isinstance(e, requests.exceptions.RequestException):
715
- err_msg = "Request Error"
716
- else:
717
- err_msg = "Unexpected Error"
718
- print(f"[bold red]Download failed for {file_name}: {err_msg}. Details: {e}")
719
-
720
- print(f"[bold #ffe5c0]Download failed after {max_attempts} attempts. Target URL: \n{target_url}")
721
- count_dict["fail"] += 1
722
-
723
713
 
714
+ elapsed = datetime.datetime.now() - start
715
+ print(f"[#3dfc40]File [bold #dfff73]{file_name} [#3dfc40]downloaded, Time: [#39cbdd]{elapsed}")
716
+ self.count["success"] += 1
717
+ count_dict["success"] += 1
718
+ return
719
+
720
+ except Exception as e:
721
+ print(f"[bold red]Failed ({type(e).__name__}): {e}")
722
+ if retry < self.max_retries:
723
+ backoff = 2**retry
724
+ print(f"[yellow]Retrying in {backoff:.1f}s ...")
725
+ await asyncio.sleep(backoff)
726
+ retry += 1
727
+ else:
728
+ print(f"[red]Giving up on {file_name}")
729
+ self.count["fail"] += 1
730
+ count_dict["fail"] += 1
731
+ return
724
732
 
725
- def _download_within_python(file_name, target_url, fname):
726
- print(f"[bold #f0f6d0]Requesting {file_name} ...")
727
-
728
- # 创建 httpx 同步客户端
729
- limits = httpx.Limits(max_connections=10, max_keepalive_connections=10)
730
- transport = httpx.HTTPTransport(retries=3)
731
- client = httpx.Client(limits=limits, transport=transport, timeout=None)
732
-
733
- num_var = max(target_url.count("var="), 1)
734
- max_timeout = 5 * 30 * num_var
735
- timeout = random.randint(max_timeout // 2, max_timeout)
736
- download_start = datetime.datetime.now()
737
-
738
- print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
739
- headers = {"User-Agent": get_ua()}
733
+ async def run(self):
734
+ print(f"📥 Starting download of {len(self.tasks)} files ...")
735
+ for url, save_path in self.tasks:
736
+ await self._download_one(url, save_path)
740
737
 
741
- try:
742
- response = client.get(target_url, headers=headers, timeout=timeout, follow_redirects=True)
743
- response.raise_for_status()
744
- print(f"[bold #96cbd7]Downloading {file_name} ...")
745
- with open(fname, "wb") as f:
746
- for chunk in response.iter_bytes(32 * 1024):
747
- if chunk:
748
- f.write(chunk)
749
- elapsed = datetime.datetime.now() - download_start
750
- print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
751
- count_dict["success"] += 1
752
- except Exception as e:
753
- err_type = type(e).__name__
754
- print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
755
- print(f"[bold #ffe5c0]Target URL: \n{target_url}")
756
- count_dict["fail"] += 1
757
- finally:
758
- client.close()
738
+ print("\n✅ All tasks completed.")
739
+ print(f"✔️ Success: {self.count['success']} | Fail: {self.count['fail']}")
759
740
 
760
741
 
761
742
  def _download_file(target_url, store_path, file_name, cover=False):
762
- fname = Path(store_path) / file_name
743
+ save_path = Path(store_path) / file_name
763
744
  file_name_split = file_name.split("_")
764
745
  file_name_split = file_name_split[:-1]
765
746
  same_file = "_".join(file_name_split) + "*nc"
766
747
 
767
748
  if match_time is not None:
768
- if check_nc(fname, print_messages=False):
769
- if not _check_ftime(fname, if_print=True):
749
+ if check_nc(save_path, print_messages=False):
750
+ if not _check_ftime(save_path, if_print=True):
770
751
  if match_time:
771
- _correct_time(fname)
752
+ _correct_time(save_path)
772
753
  count_dict["skip"] += 1
773
754
  else:
774
- _clear_existing_file(fname)
755
+ _clear_existing_file(save_path)
775
756
  count_dict["no_data"] += 1
776
757
  else:
777
758
  count_dict["skip"] += 1
778
759
  print(f"[bold green]{file_name} is correct")
779
760
  return
780
761
 
781
- if not cover and os.path.exists(fname):
782
- print(f"[bold #FFA54F]{fname} exists, skipping ...")
762
+ if not cover and os.path.exists(save_path):
763
+ print(f"[bold #FFA54F]{save_path} exists, skipping ...")
783
764
  count_dict["skip"] += 1
784
765
  return
785
766
 
786
767
  if same_file not in fsize_dict.keys():
787
- check_nc(fname, delete_if_invalid=True, print_messages=False)
768
+ check_nc(save_path, delete_if_invalid=True, print_messages=False)
788
769
 
789
- get_mean_size = _get_mean_size_move(same_file, fname)
770
+ get_mean_size = _get_mean_size_move(same_file, save_path)
790
771
 
791
- if _check_existing_file(fname, get_mean_size):
772
+ if _check_existing_file(save_path, get_mean_size):
792
773
  count_dict["skip"] += 1
793
774
  return
794
775
 
795
- _clear_existing_file(fname)
776
+ _clear_existing_file(save_path)
796
777
 
797
778
  if not use_idm:
798
- _download_within_python(file_name, target_url, fname)
779
+ python_downloader = _HycomDownloader([(target_url, save_path)])
780
+ asyncio.run(python_downloader.run())
781
+ time.sleep(3 + random.uniform(0, 10))
799
782
  else:
800
783
  idm_downloader(target_url, store_path, file_name, given_idm_engine)
801
- idm_download_list.append(fname)
802
- print(f"[bold #3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been submit to IDM for downloading")
784
+ idm_download_list.append(save_path)
785
+ # print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
786
+ time.sleep(3 + random.uniform(0, 10))
803
787
 
804
788
 
805
789
  def _check_hour_is_valid(ymdh_str):
@@ -1193,7 +1177,7 @@ if __name__ == "__main__":
1193
1177
  options = {
1194
1178
  "variables": var_list,
1195
1179
  "start_time": "2018010100",
1196
- "end_time": "2021010100",
1180
+ "end_time": "2019063000",
1197
1181
  "output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
1198
1182
  "lon_min": 105,
1199
1183
  "lon_max": 135,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oafuncs
3
- Version: 0.0.98.4
3
+ Version: 0.0.98.5
4
4
  Summary: Oceanic and Atmospheric Functions
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -21,9 +21,7 @@ oafuncs/_script/plot_dataset.py,sha256=zkSEnO_-biyagorwWXPoihts_cwuvripzEt-l9bHJ
21
21
  oafuncs/_script/replace_file_content.py,sha256=eCFZjnZcwyRvy6b4mmIfBna-kylSZTyJRfgXd6DdCjk,5982
22
22
  oafuncs/oa_down/User_Agent-list.txt,sha256=pHaMlElMvZ8TG4vf4BqkZYKqe0JIGkr4kCN0lM1Y9FQ,514295
23
23
  oafuncs/oa_down/__init__.py,sha256=kRX5eTUCbAiz3zTaQM1501paOYS_3fizDN4Pa0mtNUA,585
24
- oafuncs/oa_down/hycom_3hourly.py,sha256=heG4L_eyPsc8pgUASCXYYJEdPGAY7rtRWQsiy-qrSxE,54202
25
- oafuncs/oa_down/hycom_3hourly_20250407.py,sha256=DQd_NmQgmSqu7jsrfpDB7k23mkUEy9kyWs-dLUg7GDw,64472
26
- oafuncs/oa_down/hycom_3hourly_20250416.py,sha256=X_fcV_xeJtYD-PB3GRdFWNHMPOVUYgh17MgWOtrdIbc,53493
24
+ oafuncs/oa_down/hycom_3hourly.py,sha256=ERH24OOT62IiOSk5IiSnqad_c13zL7-uD5lAMdbJDiQ,53576
27
25
  oafuncs/oa_down/idm.py,sha256=4z5IvgfTyIKEI1kOtqXZwN7Jnfjwp6qDBOIoVyOLp0I,1823
28
26
  oafuncs/oa_down/literature.py,sha256=2bF9gSKQbzcci9LcKE81j8JEjIJwON7jbwQB3gDDA3E,11331
29
27
  oafuncs/oa_down/test_ua.py,sha256=l8MCD6yU2W75zRPTDKUZTJhCWNF9lfk-MiSFqAqKH1M,1398
@@ -37,8 +35,8 @@ oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,5
37
35
  oafuncs/oa_sign/meteorological.py,sha256=8091SHo2L8kl4dCFmmSH5NGVHDku5i5lSiLEG5DLnOQ,6489
38
36
  oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
39
37
  oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
40
- oafuncs-0.0.98.4.dist-info/licenses/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
41
- oafuncs-0.0.98.4.dist-info/METADATA,sha256=VWSW69LEhmMu9FHH_e0WcNT-YXme3mdxlNvMedU48tY,4242
42
- oafuncs-0.0.98.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
43
- oafuncs-0.0.98.4.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
44
- oafuncs-0.0.98.4.dist-info/RECORD,,
38
+ oafuncs-0.0.98.5.dist-info/licenses/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
39
+ oafuncs-0.0.98.5.dist-info/METADATA,sha256=YwSyTAse2NhMIfM_22BNOJCDLh8Mgy3X_G5zvtCKyPc,4242
40
+ oafuncs-0.0.98.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
41
+ oafuncs-0.0.98.5.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
42
+ oafuncs-0.0.98.5.dist-info/RECORD,,