oafuncs 0.0.98.4__py3-none-any.whl → 0.0.98.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,9 @@ SystemInfo: Windows 11
13
13
  Python Version: 3.12
14
14
  """
15
15
 
16
+ import asyncio
16
17
  import datetime
18
+ import logging
17
19
  import os
18
20
  import random
19
21
  import re
@@ -22,17 +24,15 @@ import warnings
22
24
  from concurrent.futures import ThreadPoolExecutor, as_completed
23
25
  from pathlib import Path
24
26
  from threading import Lock
27
+ from oafuncs.oa_tool import pbar
25
28
 
29
+ import httpx
26
30
  import matplotlib.pyplot as plt
27
31
  import netCDF4 as nc
28
32
  import numpy as np
29
33
  import pandas as pd
30
- import requests
31
- from requests.adapters import HTTPAdapter
32
- import httpx
33
34
  import xarray as xr
34
35
  from rich import print
35
- from rich.progress import Progress
36
36
 
37
37
  from oafuncs.oa_down.idm import downloader as idm_downloader
38
38
  from oafuncs.oa_down.user_agent import get_ua
@@ -40,6 +40,9 @@ from oafuncs.oa_file import file_size
40
40
  from oafuncs.oa_nc import check as check_nc
41
41
  from oafuncs.oa_nc import modify as modify_nc
42
42
 
43
+ logging.getLogger("httpx").setLevel(logging.WARNING) # 关闭 httpx 的 INFO 日志,只显示 WARNING 及以上
44
+
45
+
43
46
  warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
44
47
 
45
48
  __all__ = ["draw_time_range", "download"]
@@ -416,11 +419,9 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
416
419
  trange_list.append(f"{time_s}-{time_e}")
417
420
  have_data = True
418
421
 
419
- # 输出结果
420
- if match_time is None:
421
- print(f"[bold red]{time_input_str} is in the following dataset and version:")
422
422
  if have_data:
423
423
  if match_time is None:
424
+ print(f"[bold red]Time {time_input_str} included in:")
424
425
  dv_num = 1
425
426
  for d, v, trange in zip(d_list, v_list, trange_list):
426
427
  print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
@@ -436,7 +437,7 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
436
437
  print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
437
438
  return False
438
439
  else:
439
- print(f"[bold red]{time_input_str} is not in any dataset and version")
440
+ print(f"[bold red]Time {time_input_str} has no data")
440
441
  return False
441
442
 
442
443
 
@@ -511,7 +512,8 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
511
512
 
512
513
  if dataset_name_out is not None and version_name_out is not None:
513
514
  if match_time is None:
514
- print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
515
+ # print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
516
+ print(f"[bold purple]Chosen dataset: {dataset_name_out} - {version_name_out}")
515
517
 
516
518
  # 如果没有找到匹配的数据集和版本,会返回 None
517
519
  return dataset_name_out, version_name_out
@@ -666,140 +668,138 @@ def _correct_time(nc_file):
666
668
  modify_nc(nc_file, "time", None, time_difference)
667
669
 
668
670
 
669
- def _download_within_python_requests(file_name, target_url, fname):
670
- print(f"[bold #f0f6d0]Requesting {file_name} ...")
671
-
672
- # Session configuration
673
- session = requests.Session()
674
- adapter = HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=0)
675
- session.mount("http://", adapter)
676
- session.mount("https://", adapter)
677
-
678
- # Timeout and retry config
679
- num_var = max(target_url.count("var="), 1)
680
- max_timeout = 5 * 30 * num_var
681
- order_terms = ["1st", "2nd", "3rd"]
682
- download_start = datetime.datetime.now()
683
- max_attempts = 5
684
-
685
- for attempt in range(max_attempts):
686
- if attempt > 0:
687
- retry_desc = order_terms[attempt - 1] if attempt - 1 < len(order_terms) else f"{attempt}th"
688
- print(f"[bold #ffe5c0]Retrying the {retry_desc} time...")
689
- time.sleep(2 + random.uniform(0, 2))
690
-
691
- timeout = random.randint(max_timeout // 5, max_timeout)
692
- print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
693
-
694
- try:
695
- headers = {"User-Agent": get_ua()}
696
- with session.get(target_url, headers=headers, stream=True, timeout=timeout) as response:
697
- response.raise_for_status()
698
- print(f"[bold #96cbd7]Downloading {file_name} ...")
699
- with open(fname, "wb") as f:
700
- for chunk in response.iter_content(chunk_size=32 * 1024):
701
- if chunk:
671
+ def setup_logger(level=logging.INFO):
672
+ logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
673
+
674
+
675
+ class _HycomDownloader:
676
+ def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
677
+ self.tasks = tasks
678
+ self.delay_range = delay_range
679
+ self.timeout_factor = timeout_factor
680
+ self.max_var_count = max_var_count
681
+ self.max_retries = max_retries
682
+ self.count = {"success": 0, "fail": 0}
683
+ setup_logger()
684
+
685
+ def user_agent(self):
686
+ return get_ua()
687
+
688
+ async def _download_one(self, url, save_path):
689
+ file_name = os.path.basename(save_path)
690
+ headers = {"User-Agent": self.user_agent()}
691
+ var_count = min(max(url.count("var="), 1), self.max_var_count)
692
+ timeout_max = self.timeout_factor * var_count
693
+
694
+ retry = 0
695
+ while retry <= self.max_retries:
696
+ timeout = random.randint(timeout_max // 2, timeout_max)
697
+ try:
698
+ await asyncio.sleep(random.uniform(*self.delay_range))
699
+ start = datetime.datetime.now()
700
+
701
+ async with httpx.AsyncClient(
702
+ timeout=httpx.Timeout(timeout),
703
+ limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
704
+ transport=httpx.AsyncHTTPTransport(retries=2),
705
+ ) as client:
706
+ logging.info(f"Requesting {file_name} (Attempt {retry + 1}) ...")
707
+ response = await client.get(url, headers=headers, follow_redirects=True)
708
+ response.raise_for_status()
709
+ if not response.content:
710
+ raise ValueError("Empty response received")
711
+
712
+ logging.info(f"Downloading {file_name} ...")
713
+ with open(save_path, "wb") as f:
714
+ total = int(response.headers.get("Content-Length", 0))
715
+ downloaded = 0
716
+ last_percent = -1
717
+
718
+ async for chunk in response.aiter_bytes(32 * 1024):
702
719
  f.write(chunk)
703
- elapsed = datetime.datetime.now() - download_start
704
- print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
705
- count_dict["success"] += 1
706
- return
707
- except Exception as e:
708
- if hasattr(e, "response") and getattr(e.response, "status_code", None):
709
- err_msg = f"HTTP {e.response.status_code} Error"
710
- elif isinstance(e, requests.exceptions.Timeout):
711
- err_msg = "Timeout Error"
712
- elif isinstance(e, requests.exceptions.ConnectionError):
713
- err_msg = "Connection Error"
714
- elif isinstance(e, requests.exceptions.RequestException):
715
- err_msg = "Request Error"
716
- else:
717
- err_msg = "Unexpected Error"
718
- print(f"[bold red]Download failed for {file_name}: {err_msg}. Details: {e}")
719
-
720
- print(f"[bold #ffe5c0]Download failed after {max_attempts} attempts. Target URL: \n{target_url}")
721
- count_dict["fail"] += 1
722
-
723
-
724
-
725
- def _download_within_python(file_name, target_url, fname):
726
- print(f"[bold #f0f6d0]Requesting {file_name} ...")
727
-
728
- # 创建 httpx 同步客户端
729
- limits = httpx.Limits(max_connections=10, max_keepalive_connections=10)
730
- transport = httpx.HTTPTransport(retries=3)
731
- client = httpx.Client(limits=limits, transport=transport, timeout=None)
732
-
733
- num_var = max(target_url.count("var="), 1)
734
- max_timeout = 5 * 30 * num_var
735
- timeout = random.randint(max_timeout // 2, max_timeout)
736
- download_start = datetime.datetime.now()
720
+ downloaded += len(chunk)
721
+
722
+ if total > 0:
723
+ percent = int(downloaded * 100 / total)
724
+ if percent != last_percent:
725
+ logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
726
+ last_percent = percent
727
+
728
+
729
+ elapsed = datetime.datetime.now() - start
730
+ # logging.info(f"File {file_name} downloaded, Time: {elapsed}")
731
+ logging.info(f"Saving {file_name}, Time: {elapsed}")
732
+ self.count["success"] += 1
733
+ count_dict["success"] += 1
734
+ return
735
+
736
+ except Exception as e:
737
+ logging.error(f"Failed ({type(e).__name__}): {e}")
738
+ if retry < self.max_retries:
739
+ backoff = 2**retry
740
+ logging.warning(f"Retrying in {backoff:.1f}s ...")
741
+ await asyncio.sleep(backoff)
742
+ retry += 1
743
+ else:
744
+ logging.error(f"Giving up on {file_name}")
745
+ self.count["fail"] += 1
746
+ count_dict["fail"] += 1
747
+ return
737
748
 
738
- print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
739
- headers = {"User-Agent": get_ua()}
749
+ async def run(self):
750
+ logging.info(f"📥 Starting download of {len(self.tasks)} files ...")
751
+ for url, save_path in self.tasks:
752
+ await self._download_one(url, save_path)
740
753
 
741
- try:
742
- response = client.get(target_url, headers=headers, timeout=timeout, follow_redirects=True)
743
- response.raise_for_status()
744
- print(f"[bold #96cbd7]Downloading {file_name} ...")
745
- with open(fname, "wb") as f:
746
- for chunk in response.iter_bytes(32 * 1024):
747
- if chunk:
748
- f.write(chunk)
749
- elapsed = datetime.datetime.now() - download_start
750
- print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
751
- count_dict["success"] += 1
752
- except Exception as e:
753
- err_type = type(e).__name__
754
- print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
755
- print(f"[bold #ffe5c0]Target URL: \n{target_url}")
756
- count_dict["fail"] += 1
757
- finally:
758
- client.close()
754
+ logging.info("✅ All tasks completed.")
755
+ logging.info(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
759
756
 
760
757
 
761
758
  def _download_file(target_url, store_path, file_name, cover=False):
762
- fname = Path(store_path) / file_name
759
+ save_path = Path(store_path) / file_name
763
760
  file_name_split = file_name.split("_")
764
761
  file_name_split = file_name_split[:-1]
765
762
  same_file = "_".join(file_name_split) + "*nc"
766
763
 
767
764
  if match_time is not None:
768
- if check_nc(fname, print_messages=False):
769
- if not _check_ftime(fname, if_print=True):
765
+ if check_nc(save_path, print_messages=False):
766
+ if not _check_ftime(save_path, if_print=True):
770
767
  if match_time:
771
- _correct_time(fname)
768
+ _correct_time(save_path)
772
769
  count_dict["skip"] += 1
773
770
  else:
774
- _clear_existing_file(fname)
771
+ _clear_existing_file(save_path)
775
772
  count_dict["no_data"] += 1
776
773
  else:
777
774
  count_dict["skip"] += 1
778
775
  print(f"[bold green]{file_name} is correct")
779
776
  return
780
777
 
781
- if not cover and os.path.exists(fname):
782
- print(f"[bold #FFA54F]{fname} exists, skipping ...")
778
+ if not cover and os.path.exists(save_path):
779
+ print(f"[bold #FFA54F]{save_path} exists, skipping ...")
783
780
  count_dict["skip"] += 1
784
781
  return
785
782
 
786
783
  if same_file not in fsize_dict.keys():
787
- check_nc(fname, delete_if_invalid=True, print_messages=False)
784
+ check_nc(save_path, delete_if_invalid=True, print_messages=False)
788
785
 
789
- get_mean_size = _get_mean_size_move(same_file, fname)
786
+ get_mean_size = _get_mean_size_move(same_file, save_path)
790
787
 
791
- if _check_existing_file(fname, get_mean_size):
788
+ if _check_existing_file(save_path, get_mean_size):
792
789
  count_dict["skip"] += 1
793
790
  return
794
791
 
795
- _clear_existing_file(fname)
792
+ _clear_existing_file(save_path)
796
793
 
797
794
  if not use_idm:
798
- _download_within_python(file_name, target_url, fname)
795
+ python_downloader = _HycomDownloader([(target_url, save_path)])
796
+ asyncio.run(python_downloader.run())
797
+ time.sleep(3 + random.uniform(0, 10))
799
798
  else:
800
799
  idm_downloader(target_url, store_path, file_name, given_idm_engine)
801
- idm_download_list.append(fname)
802
- print(f"[bold #3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been submit to IDM for downloading")
800
+ idm_download_list.append(save_path)
801
+ # print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
802
+ time.sleep(3 + random.uniform(0, 10))
803
803
 
804
804
 
805
805
  def _check_hour_is_valid(ymdh_str):
@@ -890,7 +890,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
890
890
  else:
891
891
  if download_time < "2024081012":
892
892
  varlist = [_ for _ in var]
893
- for key, value in var_group.items():
893
+ for key, value in pbar(var_group.items(), description=f"Var_group {download_time} ->", total=len(var_group), cmap="bwr", next_line=True):
894
894
  current_group = []
895
895
  for v in varlist:
896
896
  if v in value:
@@ -912,7 +912,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
912
912
  file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
913
913
  _download_file(submit_url, store_path, file_name, cover)
914
914
  else:
915
- for v in var:
915
+ for v in pbar(var,description=f'Var {download_time} ->', total=len(var), cmap='bwr', next_line=True):
916
916
  submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
917
917
  file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
918
918
  if download_time_end is not None:
@@ -946,7 +946,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
946
946
  if num_workers is not None and num_workers > 1:
947
947
  global parallel_counter
948
948
  parallel_counter = 0
949
- counter_lock = Lock()
949
+ counter_lock = Lock() # noqa: F841
950
950
  if ymdh_time_s == ymdh_time_e:
951
951
  _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
952
952
  elif int(ymdh_time_s) < int(ymdh_time_e):
@@ -954,17 +954,19 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
954
954
  print("*" * mark_len)
955
955
  print("Downloading a series of files...")
956
956
  time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
957
- with Progress() as progress:
958
- task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
959
- if num_workers is None or num_workers <= 1:
960
- for i, time_str in enumerate(time_list):
961
- _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
962
- progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
963
- else:
964
- with ThreadPoolExecutor(max_workers=num_workers) as executor:
965
- futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
966
- for feature in as_completed(futures):
967
- _done_callback(feature, progress, task, len(time_list), counter_lock)
957
+ # with Progress() as progress:
958
+ # task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
959
+ if num_workers is None or num_workers <= 1:
960
+ for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), cmap='colorful_1', next_line=True):
961
+ _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
962
+ # progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
963
+ else:
964
+ with ThreadPoolExecutor(max_workers=num_workers) as executor:
965
+ futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
966
+ """ for feature in as_completed(futures):
967
+ _done_callback(feature, progress, task, len(time_list), counter_lock) """
968
+ for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),cmap='colorful_1',next_line=True):
969
+ pass
968
970
  else:
969
971
  print("[bold red]Please ensure the time_s is no more than time_e")
970
972
 
@@ -1048,7 +1050,6 @@ def download(
1048
1050
  interval_hours=3,
1049
1051
  )
1050
1052
  """
1051
- from oafuncs.oa_tool import pbar
1052
1053
 
1053
1054
  _get_initial_data()
1054
1055
 
@@ -1109,10 +1110,10 @@ def download(
1109
1110
  workers = 1
1110
1111
  given_idm_engine = idm_path
1111
1112
  idm_download_list = []
1112
- bar_desc = "Submitting to IDM ..."
1113
+ bar_desc = "Submitting to IDM ->"
1113
1114
  else:
1114
1115
  use_idm = False
1115
- bar_desc = "Downloading ..."
1116
+ bar_desc = "Downloading ->"
1116
1117
 
1117
1118
  global match_time
1118
1119
  match_time = validate_time
@@ -1124,7 +1125,7 @@ def download(
1124
1125
  workers = 1
1125
1126
  print("*" * mark_len)
1126
1127
  print("[bold red]Only checking the time of existing files.")
1127
- bar_desc = "Checking time ..."
1128
+ bar_desc = "Checking time ->"
1128
1129
 
1129
1130
  _download_hourly_func(
1130
1131
  variables,
@@ -1150,7 +1151,7 @@ def download(
1150
1151
  print("[bold #ecdbfe]*" * mark_len)
1151
1152
  if idm_download_list:
1152
1153
  remain_list = idm_download_list.copy()
1153
- for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading: "):
1154
+ for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading ->"):
1154
1155
  success = False
1155
1156
  while not success:
1156
1157
  for f in remain_list:
@@ -1193,7 +1194,7 @@ if __name__ == "__main__":
1193
1194
  options = {
1194
1195
  "variables": var_list,
1195
1196
  "start_time": "2018010100",
1196
- "end_time": "2021010100",
1197
+ "end_time": "2019063000",
1197
1198
  "output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
1198
1199
  "lon_min": 105,
1199
1200
  "lon_max": 135,
@@ -1206,6 +1207,7 @@ if __name__ == "__main__":
1206
1207
  "validate_time": None,
1207
1208
  # "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
1208
1209
  "interval_hours": 24,
1210
+ "proxy_txt": None,
1209
1211
  }
1210
1212
 
1211
1213
  if single_var: